C++ (Cpp) v_maxの例

コード例 #1

0

ファイルを表示

ファイル: OBox.cpp プロジェクト: jeppewalther/GEL

OBox OBox::box_triangle(const Triangle& t)
{
	Vec3f e0 = t.get_v1()-t.get_v0();
	Vec3f e1 = t.get_v2()-t.get_v1();
	Vec3f e2 = t.get_v0()-t.get_v2();

	Vec3f X,Y,Z;
	if(sqr_length(e0) > sqr_length(e1))
		{
			if(sqr_length(e0) > sqr_length(e2))
				{
					X = normalize(e0);
					Y = normalize(e1 - X * dot(X, e1));
				}
			else
				{
					X = normalize(e2);
					Y = normalize(e0 - X * dot(X, e0));
				}
		}
	else
		{
			if(sqr_length(e1) > sqr_length(e2))
				{
					X = normalize(e1);
					Y = normalize(e2 - X * dot(X, e2));
				}
			else
				{
					X = normalize(e2);
					Y = normalize(e0 - X * dot(X, e0));
				}
		}
	Z = cross(X,Y);
	
	const Mat3x3f Rot(X,Y,Z);

	Vec3f p0 = Rot * t.get_v0();
	Vec3f p1 = Rot * t.get_v1();
	Vec3f p2 = Rot * t.get_v2();
	Vec3f pmin = v_min(p0, v_min(p1, p2));
	Vec3f pmax = v_max(p0, v_max(p1, p2));
	
	Vec3f centre_close = v_max(pmin, v_min(pmax, Rot * t.get_centre()));
	return OBox(Rot, AABox(pmin, pmax, centre_close));
}

コード例 #2

0

ファイルを表示

ファイル: AABox.cpp プロジェクト: janba/GEL

bool AABox::intersect(const CGLA::Vec3f& p, const CGLA::Vec3f& dir) const
{
	Vec3f t0,t1;
	for(int i=0;i<3;++i)
		{
			t0[i] = (pmin[i]-p[i])/dir[i];
			t1[i] = (pmax[i]-p[i])/dir[i];
		}
	Vec3f tin = v_min(t0, t1);
	Vec3f tout = v_max(t0,t1);
	float tmin = max(tin[0], max(tin[1], tin[2]));
	float tmax = min(tout[0], min(tout[1], tout[2]));

	return ( (tmin-CGLA::TINY) < (tmax+CGLA::TINY));
}

コード例 #3

0

ファイルを表示

ファイル: test_intrin_utils.hpp プロジェクト: gini/opencv

    TheTest & test_min_max()
    {
        Data<R> dataA, dataB;
        dataB.reverse();
        R a = dataA, b = dataB;

        Data<R> resC = v_min(a, b), resD = v_max(a, b);
        for (int i = 0; i < R::nlanes; ++i)
        {
            EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]);
            EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]);
        }

        return *this;
    }

コード例 #4

0

ファイルを表示

ファイル: Node.cpp プロジェクト: ssashir06/DieHardBucets

 list<vector<Volume> > Node::GetActions(const vector<Volume>& capacities) const
 {
     auto dimention = volumes.size();
     auto d_nodes = vector<const Node*>(dimention);
     list<vector<Volume> > result;
     
     for (auto d = 0; d < dimention; d++)
     {
         vector<Volume> v_zero(volumes);
         vector<Volume> v_max(volumes);
         v_zero[d] = 0;
         v_max[d] = capacities[d];
         if (volumes[d] != v_zero[d]) result.push_back(v_zero);
         if (volumes[d] != v_max[d]) result.push_back(v_max);
     }
     
     for (auto d1 = 0; d1 < dimention; d1++)
     {
         for (auto d2 = 0; d2 < dimention; d2++)
         {
             if (d1 == d2) continue;
             
             auto vd1 = volumes[d1] + volumes[d2] - capacities[d2];
             auto vd2 = capacities[d2];
             if (vd1 < 0)
             {
                 vd2 += vd1;
                 vd1 = 0;
             }
             
             if (volumes[d1] != vd1 && volumes[d2] != vd2)
             {
                 vector<Volume> v_swap(volumes);
                 v_swap[d1] = vd1;
                 v_swap[d2] = vd2;
                 result.push_back(v_swap);
             }
         }
     }
     
     return result;
 }

コード例 #5

0

ファイルを表示

ファイル: vpSimulatorCamera.cpp プロジェクト: ILoveFree2/visp-deb

/*!
  Send to the controller a velocity.

  \param frame : Control frame type. Only articular (vpRobot::ARTICULAR_FRAME)
  and camera frame (vpRobot::CAMERA_FRAME) are implemented.

  \param v : Velocity to apply to the robot.

  - In the camera frame, this velocity is represented by a vector of dimension 6
  \f$ {\bf v} = [{\bf t}, {\bf \theta u }]^t \f$ where \f$ \bf t \f$ is a
  translation vector and \f$ {\bf \theta u} \f$ is a rotation vector (see
  vpThetaUVector): \f$ {\bf v} = [t_x, t_y, t_z, {\theta u}_x, {\theta u}_y,
  {\theta u}_z] \f$ (see vpTranslationVector and vpThetaUVector).

  - In articular, this velocity is represented by a 6 dimension vector \f$
  \dot{{\bf q}} = [{\bf t}, {\bf \theta u}]^t \f$ where \f$ \bf t \f$ is a
  translation vector and \f$ {\bf \theta u} \f$ is a rotation vector (see
  vpThetaUVector): \f$ \dot{{\bf q}} = [t_x, t_y, t_z, {\theta u}_x, {\theta
  u}_y, {\theta u}_z] \f$ (see vpTranslationVector and vpThetaUVector). The
  robot jacobian \f$ {^e}{\bf J}_e\f$ expressed in the end-effector frame is
  here set to identity.

  We use the exponential map (vpExponentialMap) to update the camera location.
  Sampling time can be set using setSamplingTime().

  \sa setSamplingTime()

*/
void
vpSimulatorCamera::setVelocity(const vpRobot::vpControlFrameType frame,
                               const vpColVector &v)
{
  if (vpRobot::STATE_VELOCITY_CONTROL != getRobotState ()) {
    setRobotState(vpRobot::STATE_VELOCITY_CONTROL);
  }

  switch (frame)
  {
  case vpRobot::ARTICULAR_FRAME:
  case vpRobot::CAMERA_FRAME: {
      vpColVector v_max(6);

      for (unsigned int i=0; i<3; i++)
        v_max[i] = getMaxTranslationVelocity();
      for (unsigned int i=3; i<6; i++)
        v_max[i] = getMaxRotationVelocity();

      vpColVector v_sat = vpRobot::saturateVelocities(v, v_max, true);

      wMc_ = wMc_ * vpExponentialMap::direct(v_sat, delta_t_);
      setRobotFrame(frame);
      break ;
    }
  case vpRobot::REFERENCE_FRAME:
    vpERROR_TRACE ("Cannot set a velocity in the reference frame: "
                   "functionality not implemented");
    throw vpRobotException (vpRobotException::wrongStateError,
                            "Cannot set a velocity in the reference frame:"
                            "functionality not implemented");
    break ;
  case vpRobot::MIXT_FRAME:
    vpERROR_TRACE ("Cannot set a velocity in the mixt frame: "
                   "functionality not implemented");
    throw vpRobotException (vpRobotException::wrongStateError,
                            "Cannot set a velocity in the mixt frame:"
                            "functionality not implemented");

    break ;
  }
}

コード例 #6

0

ファイルを表示

ファイル: vpSimulatorPioneer.cpp プロジェクト: 976717326/visp

/*!
  Send to the controller a velocity.

  \param frame : Control frame type. Only vpRobot::ARTICULAR_FRAME is implemented.

  \param v : Velocity vector \f$(v_x, w_z)\f$ to apply to the robot.

  Depending on the velocity specified as input, the robot position is updated
  using the sampling time that can be modified using setSamplingTime().

  \sa setSamplingTime()

*/
void
vpSimulatorPioneer::setVelocity(const vpRobot::vpControlFrameType frame,
                                const vpColVector &v)
{
  switch (frame)
  {
  case vpRobot::ARTICULAR_FRAME: {
      if (vpRobot::STATE_VELOCITY_CONTROL != getRobotState ()) {
        setRobotState(vpRobot::STATE_VELOCITY_CONTROL);
      }
      setRobotFrame(frame);

      // v is a 2 dimension vector that contains v,w
      if (v.size() != 2) {
        vpERROR_TRACE ("Bad dimension of the control vector");
        throw vpRobotException (vpRobotException::dimensionError,
                                "Bad dimension of the control vector");
      }

      vpColVector v_max(2);

      v_max[0] = getMaxTranslationVelocity();
      v_max[1] = getMaxRotationVelocity();

      vpColVector v_sat = vpRobot::saturateVelocities(v, v_max, true);

      xm_ += delta_t_ * v_sat[0] * cos(theta_);
      ym_ += delta_t_ * v_sat[0] * sin(theta_);
      theta_ += delta_t_ * v_sat[1];

      vpRotationMatrix wRe(0, 0, theta_);
      vpTranslationVector wte(xm_, ym_, 0);
      wMe_.buildFrom(wte, wRe);
      wMc_ = wMe_ * cMe_.inverse();

      break ;
      }
    break ;
  case vpRobot::CAMERA_FRAME:
    vpERROR_TRACE ("Cannot set a velocity in the camera frame: "
                   "functionality not implemented");
    throw vpRobotException (vpRobotException::wrongStateError,
                            "Cannot set a velocity in the camera frame:"
                            "functionality not implemented");
    break ;
  case vpRobot::REFERENCE_FRAME:
    vpERROR_TRACE ("Cannot set a velocity in the reference frame: "
                   "functionality not implemented");
    throw vpRobotException (vpRobotException::wrongStateError,
                            "Cannot set a velocity in the articular frame:"
                            "functionality not implemented");
  case vpRobot::MIXT_FRAME:
    vpERROR_TRACE ("Cannot set a velocity in the mixt frame: "
		 "functionality not implemented");
    throw vpRobotException (vpRobotException::wrongStateError,
			    "Cannot set a velocity in the mixt frame:"
			    "functionality not implemented");

    break ;
  }
}

コード例 #7

0

ファイルを表示

ファイル: mathfuncs_core.simd.hpp プロジェクト: mab0/opencv

void exp64f( const double *_x, double *y, int n )
{
    CV_INSTRUMENT_REGION();

    const double* const expTab = cv::details::getExpTab64f();

    const double
    A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
    A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
    A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
    A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
    A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
    A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;

    int i = 0;
    const Cv64suf* x = (const Cv64suf*)_x;
    double minval = (-exp_max_val/exp_prescale);
    double maxval = (exp_max_val/exp_prescale);

#if CV_SIMD_64F
    const int VECSZ = v_float64::nlanes;
    const v_float64 vprescale = vx_setall_f64(exp_prescale);
    const v_float64 vpostscale = vx_setall_f64(exp_postscale);
    const v_float64 vminval = vx_setall_f64(minval);
    const v_float64 vmaxval = vx_setall_f64(maxval);

    const v_float64 vA1 = vx_setall_f64(A1);
    const v_float64 vA2 = vx_setall_f64(A2);
    const v_float64 vA3 = vx_setall_f64(A3);
    const v_float64 vA4 = vx_setall_f64(A4);
    const v_float64 vA5 = vx_setall_f64(A5);

    const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
    bool y_aligned = (size_t)(void*)y % 32 == 0;

    for( ; i < n; i += VECSZ*2 )
    {
        if( i + VECSZ*2 > n )
        {
            if( i == 0 || _x == y )
                break;
            i = n - VECSZ*2;
            y_aligned = false;
        }

        v_float64 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f);

        xf0 = v_min(v_max(xf0, vminval), vmaxval);
        xf1 = v_min(v_max(xf1, vminval), vmaxval);

        xf0 *= vprescale;
        xf1 *= vprescale;

        v_int32 xi0 = v_round(xf0);
        v_int32 xi1 = v_round(xf1);
        xf0 = (xf0 - v_cvt_f64(xi0))*vpostscale;
        xf1 = (xf1 - v_cvt_f64(xi1))*vpostscale;

        v_float64 yf0 = v_lut(expTab, xi0 & vidxmask);
        v_float64 yf1 = v_lut(expTab, xi1 & vidxmask);

        v_int32 v0 = vx_setzero_s32(), v1023 = vx_setall_s32(1023), v2047 = vx_setall_s32(2047);
        xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v1023, v0), v2047);
        xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v1023, v0), v2047);

        v_int64 xq0, xq1, dummy;
        v_expand(xi0, xq0, dummy);
        v_expand(xi1, xq1, dummy);

        yf0 *= v_reinterpret_as_f64(v_shl<52>(xq0));
        yf1 *= v_reinterpret_as_f64(v_shl<52>(xq1));

        v_float64 zf0 = xf0 + vA1;
        v_float64 zf1 = xf1 + vA1;

        zf0 = v_fma(zf0, xf0, vA2);
        zf1 = v_fma(zf1, xf1, vA2);

        zf0 = v_fma(zf0, xf0, vA3);
        zf1 = v_fma(zf1, xf1, vA3);

        zf0 = v_fma(zf0, xf0, vA4);
        zf1 = v_fma(zf1, xf1, vA4);

        zf0 = v_fma(zf0, xf0, vA5);
        zf1 = v_fma(zf1, xf1, vA5);

        zf0 *= yf0;
        zf1 *= yf1;

        if( y_aligned )
        {
            v_store_aligned(y + i, zf0);
            v_store_aligned(y + i + VECSZ, zf1);
        }
        else
        {
            v_store(y + i, zf0);
            v_store(y + i + VECSZ, zf1);
        }
    }
    vx_cleanup();
#endif

    for( ; i < n; i++ )
    {
        double x0 = x[i].f;
        x0 = std::min(std::max(x0, minval), maxval);
        x0 *= exp_prescale;
        Cv64suf buf;

        int xi = saturate_cast<int>(x0);
        x0 = (x0 - xi)*exp_postscale;

        int t = (xi >> EXPTAB_SCALE) + 1023;
        t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
        buf.i = (int64)t << 52;

        y[i] = buf.f * expTab[xi & EXPTAB_MASK] * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5);
    }
}

コード例 #8

0

ファイルを表示

ファイル: mathfuncs_core.simd.hpp プロジェクト: mab0/opencv

void exp32f( const float *_x, float *y, int n )
{
    CV_INSTRUMENT_REGION();

    const float* const expTab_f = cv::details::getExpTab32f();

    const float
    A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
    A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
    A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
    A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);

    int i = 0;
    const Cv32suf* x = (const Cv32suf*)_x;
    float minval = (float)(-exp_max_val/exp_prescale);
    float maxval = (float)(exp_max_val/exp_prescale);
    float postscale = (float)exp_postscale;

#if CV_SIMD
    const int VECSZ = v_float32::nlanes;
    const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
    const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
    const v_float32 vminval = vx_setall_f32(minval);
    const v_float32 vmaxval = vx_setall_f32(maxval);

    const v_float32 vA1 = vx_setall_f32((float)A1);
    const v_float32 vA2 = vx_setall_f32((float)A2);
    const v_float32 vA3 = vx_setall_f32((float)A3);
    const v_float32 vA4 = vx_setall_f32((float)A4);

    const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
    bool y_aligned = (size_t)(void*)y % 32 == 0;

    for( ; i < n; i += VECSZ*2 )
    {
        if( i + VECSZ*2 > n )
        {
            if( i == 0 || _x == y )
                break;
            i = n - VECSZ*2;
            y_aligned = false;
        }

        v_float32 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f);

        xf0 = v_min(v_max(xf0, vminval), vmaxval);
        xf1 = v_min(v_max(xf1, vminval), vmaxval);

        xf0 *= vprescale;
        xf1 *= vprescale;

        v_int32 xi0 = v_round(xf0);
        v_int32 xi1 = v_round(xf1);
        xf0 = (xf0 - v_cvt_f32(xi0))*vpostscale;
        xf1 = (xf1 - v_cvt_f32(xi1))*vpostscale;

        v_float32 yf0 = v_lut(expTab_f, xi0 & vidxmask);
        v_float32 yf1 = v_lut(expTab_f, xi1 & vidxmask);

        v_int32 v0 = vx_setzero_s32(), v127 = vx_setall_s32(127), v255 = vx_setall_s32(255);
        xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v127, v0), v255);
        xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v127, v0), v255);

        yf0 *= v_reinterpret_as_f32(v_shl<23>(xi0));
        yf1 *= v_reinterpret_as_f32(v_shl<23>(xi1));

        v_float32 zf0 = xf0 + vA1;
        v_float32 zf1 = xf1 + vA1;

        zf0 = v_fma(zf0, xf0, vA2);
        zf1 = v_fma(zf1, xf1, vA2);

        zf0 = v_fma(zf0, xf0, vA3);
        zf1 = v_fma(zf1, xf1, vA3);

        zf0 = v_fma(zf0, xf0, vA4);
        zf1 = v_fma(zf1, xf1, vA4);

        zf0 *= yf0;
        zf1 *= yf1;

        if( y_aligned )
        {
            v_store_aligned(y + i, zf0);
            v_store_aligned(y + i + VECSZ, zf1);
        }
        else
        {
            v_store(y + i, zf0);
            v_store(y + i + VECSZ, zf1);
        }
    }
    vx_cleanup();
#endif

    for( ; i < n; i++ )
    {
        float x0 = x[i].f;
        x0 = std::min(std::max(x0, minval), maxval);
        x0 *= (float)exp_prescale;
        Cv32suf buf;

        int xi = saturate_cast<int>(x0);
        x0 = (x0 - xi)*postscale;

        int t = (xi >> EXPTAB_SCALE) + 127;
        t = !(t & ~255) ? t : t < 0 ? 0 : 255;
        buf.i = t << 23;

        y[i] = buf.f * expTab_f[xi & EXPTAB_MASK] * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4);
    }
}

コード例 #9

0

ファイルを表示

ファイル: AABox.cpp プロジェクト: janba/GEL

AABox AABox::box_and_split(const std::vector<Triangle>& invec,
													 std::vector<Triangle>& lvec,
													 std::vector<Triangle>& rvec)
{
	const size_t N = invec.size();
	Vec3f tri_pmin(FLT_MAX), tri_pmax(-FLT_MAX);
			
	for(size_t i=0;i<N;++i)
		{
			tri_pmin = v_min(invec[i].get_pmin(), tri_pmin);
			tri_pmax = v_max(invec[i].get_pmax(), tri_pmax);
		}
	Vec3f diff = tri_pmax - tri_pmin;

	// Find the point closest to the centre.
	Vec3f centre = tri_pmin + diff;
	Vec3f centre_close = invec[0].get_v0();
	float min_dist = FLT_MAX;
	for(size_t i=0;i<N;++i)
		{
			Vec3f v0 = invec[i].get_v0();
			Vec3f v1 = invec[i].get_v1();
			Vec3f v2 = invec[i].get_v2();
			float sl0 = sqr_length(centre-v0);
			if(sl0 < min_dist)
				{
					min_dist = sl0;
					centre_close = v0;
				}
			float sl1 = sqr_length(centre-v1);
			if(sl1 < min_dist)
				{
					min_dist = sl1;
					centre_close = v1;
				}
			float sl2 = sqr_length(centre-v2);
			if(sl2 < min_dist)
				{
					min_dist = sl2;
					centre_close = v2;
				}
		}

	int k;
	if(diff[0]>diff[1])
		{
			if(diff[0]>diff[2]) 
				k = 0;
			else 
				k = 2;
		}
	else
		{
			if(diff[1]>diff[2]) 
				k = 1;
			else 
				k = 2;
		}

	float thresh = diff[k]/2.0f + tri_pmin[k];

 	for(size_t i=0;i<N;++i)
		{
			if(invec[i].get_centre()[k] > thresh)
				rvec.push_back(invec[i]);
			else
				lvec.push_back(invec[i]);
		}
	if(lvec.empty() || rvec.empty())
		{
			lvec.clear();
			lvec.insert(lvec.end(),
									invec.begin(),
									invec.begin()+N/2);
			rvec.clear();
			rvec.insert(rvec.end(),
									invec.begin()+N/2,
									invec.end());
		}
	assert(!lvec.empty());
	assert(!rvec.empty());
	assert(lvec.size()+rvec.size() == invec.size());
	return AABox(tri_pmin, tri_pmax, centre_close);
}

コード例 #10

0

ファイルを表示

ファイル: OBox.cpp プロジェクト: jeppewalther/GEL

OBox OBox::box_and_split(const std::vector<Triangle>& invec,
													 std::vector<Triangle>& lvec,
													 std::vector<Triangle>& rvec)
{
	// Obtain the rotation matrix for the OBB
	const Mat3x3f Rot = compute_rotation(invec);
	const int N_tri = invec.size();
	const int N_pts = 3*N_tri;

	// Compute the rotated set of points and the extents of the point aligned 
	// BBox.
	vector<Vec3f> pts(N_pts);
	Vec3f tri_pmin(FLT_MAX), tri_pmax(-FLT_MAX);
	for(int i=0;i<N_tri;++i)
		{
			const Triangle& tri = invec[i];
			
			int offs = 3*i;
			pts[offs  ] = Rot*tri.get_v0();
			pts[offs+1] = Rot*tri.get_v1();
			pts[offs+2] = Rot*tri.get_v2();
			
			for(int j=0;j<3;++j)
				{
					tri_pmin = v_min(pts[offs+j], tri_pmin);
					tri_pmax = v_max(pts[offs+j], tri_pmax);
				}
		}

	// Find the point closest to the centre.
	const Vec3f centre = tri_pmin + 0.5f*(tri_pmax-tri_pmin);
	Vec3f centre_close;
	float min_dist = FLT_MAX;
	for(int i=0;i<N_pts;++i)
		{
			Vec3f v = pts[i];
			float sl = sqr_length(centre-v);
			if(sl < min_dist)
				{
					min_dist = sl;
					centre_close = v;
				}
		}

	// Partition the triangles
	const float thresh = centre[0];
	for(int i=0;i<N_tri;++i)
		{
			Vec3f p = Rot * invec[i].get_centre();
			if( p[0] > thresh)
				rvec.push_back(invec[i]);
			else
				lvec.push_back(invec[i]);
		}

	// If all triangles landed in one box, split them naively.
	if(lvec.empty() || rvec.empty())
		{
			lvec.clear();
			lvec.insert(lvec.end(),
									invec.begin(),
									invec.begin()+N_tri/2);
			rvec.clear();
			rvec.insert(rvec.end(),
									invec.begin()+N_tri/2,
									invec.end());
		}
		
	return OBox(Rot, AABox(tri_pmin, tri_pmax, centre_close));
}

コード例 #11

0

ファイルを表示

ファイル: fast.cpp プロジェクト: AliMiraftab/opencv

void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
    Mat img = _img.getMat();
    const int K = patternSize/2, N = patternSize + K + 1;
    int i, j, k, pixel[25];
    makeOffsets(pixel, (int)img.step, patternSize);

#if CV_SIMD128
    const int quarterPatternSize = patternSize/4;
    v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
    bool hasSimd = hasSIMD128();
#if CV_TRY_AVX2
    Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
    if(CV_CPU_HAS_SUPPORT_AVX2)
        fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel);
#endif

#endif

    keypoints.clear();

    threshold = std::min(std::max(threshold, 0), 255);

    uchar threshold_tab[512];
    for( i = -255; i <= 255; i++ )
        threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);

    AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128);
    uchar* buf[3];
    buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
    int* cpbuf[3];
    cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;
    cpbuf[1] = cpbuf[0] + img.cols + 1;
    cpbuf[2] = cpbuf[1] + img.cols + 1;
    memset(buf[0], 0, img.cols*3);

    for(i = 3; i < img.rows-2; i++)
    {
        const uchar* ptr = img.ptr<uchar>(i) + 3;
        uchar* curr = buf[(i - 3)%3];
        int* cornerpos = cpbuf[(i - 3)%3];
        memset(curr, 0, img.cols);
        int ncorners = 0;

        if( i < img.rows - 3 )
        {
            j = 3;
#if CV_SIMD128
            if( hasSimd )
            {
                if( patternSize == 16 )
                {
#if CV_TRY_AVX2
                    if (fast_t_impl_avx2)
                        fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners);
#endif
                    //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements
                    {
                        for (; j < img.cols - 16 - 3; j += 16, ptr += 16)
                        {
                            v_uint8x16 v = v_load(ptr);
                            v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta);
                            v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta);

                            v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta));
                            v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta));
                            v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta));
                            v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta));

                            v_int8x16 m0, m1;
                            m0 = (v0 < x0) & (v0 < x1);
                            m1 = (x0 < v1) & (x1 < v1);
                            m0 = m0 | ((v0 < x1) & (v0 < x2));
                            m1 = m1 | ((x1 < v1) & (x2 < v1));
                            m0 = m0 | ((v0 < x2) & (v0 < x3));
                            m1 = m1 | ((x2 < v1) & (x3 < v1));
                            m0 = m0 | ((v0 < x3) & (v0 < x0));
                            m1 = m1 | ((x3 < v1) & (x0 < v1));
                            m0 = m0 | m1;

                            int mask = v_signmask(m0);
                            if( mask == 0 )
                                continue;
                            if( (mask & 255) == 0 )
                            {
                                j -= 8;
                                ptr -= 8;
                                continue;
                            }

                            v_int8x16 c0 = v_setzero_s8();
                            v_int8x16 c1 = v_setzero_s8();
                            v_uint8x16 max0 = v_setzero_u8();
                            v_uint8x16 max1 = v_setzero_u8();
                            for( k = 0; k < N; k++ )
                            {
                                v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta);
                                m0 = v0 < x;
                                m1 = x < v1;

                                c0 = v_sub_wrap(c0, m0) & m0;
                                c1 = v_sub_wrap(c1, m1) & m1;

                                max0 = v_max(max0, v_reinterpret_as_u8(c0));
                                max1 = v_max(max1, v_reinterpret_as_u8(c1));
                            }

                            max0 = v_max(max0, max1);
                            int m = v_signmask(K16 < max0);

                            for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
                            {
                                if(m & 1)
                                {
                                    cornerpos[ncorners++] = j+k;
                                    if(nonmax_suppression)
                                        curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold);
                                }
                            }
                        }
                    }
                }
            }
#endif
            for( ; j < img.cols - 3; j++, ptr++ )
            {
                int v = ptr[0];
                const uchar* tab = &threshold_tab[0] - v + 255;
                int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];

                if( d == 0 )
                    continue;

                d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
                d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
                d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];

                if( d == 0 )
                    continue;

                d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
                d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
                d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
                d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];

                if( d & 1 )
                {
                    int vt = v - threshold, count = 0;

                    for( k = 0; k < N; k++ )
                    {
                        int x = ptr[pixel[k]];
                        if(x < vt)
                        {
                            if( ++count > K )
                            {
                                cornerpos[ncorners++] = j;
                                if(nonmax_suppression)
                                    curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
                                break;
                            }
                        }
                        else
                            count = 0;
                    }
                }

                if( d & 2 )
                {
                    int vt = v + threshold, count = 0;

                    for( k = 0; k < N; k++ )
                    {
                        int x = ptr[pixel[k]];
                        if(x > vt)
                        {
                            if( ++count > K )
                            {
                                cornerpos[ncorners++] = j;
                                if(nonmax_suppression)
                                    curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
                                break;
                            }
                        }
                        else
                            count = 0;
                    }
                }
            }
        }

        cornerpos[-1] = ncorners;

        if( i == 3 )
            continue;

        const uchar* prev = buf[(i - 4 + 3)%3];
        const uchar* pprev = buf[(i - 5 + 3)%3];
        cornerpos = cpbuf[(i - 4 + 3)%3];
        ncorners = cornerpos[-1];

        for( k = 0; k < ncorners; k++ )
        {
            j = cornerpos[k];
            int score = prev[j];
            if( !nonmax_suppression ||
               (score > prev[j+1] && score > prev[j-1] &&
                score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
                score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
            {
                keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score));
            }
        }
    }