Esempio n. 1
0
    TheTest & test_mask()
    {
        Data<R> dataA, dataB, dataC, dataD(1), dataE(2);
        dataA[1] *= (LaneType)-1;
        dataC *= (LaneType)-1;
        R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;

        int m = v_signmask(a);
        EXPECT_EQ(2, m);

        EXPECT_EQ(false, v_check_all(a));
        EXPECT_EQ(false, v_check_all(b));
        EXPECT_EQ(true, v_check_all(c));

        EXPECT_EQ(true, v_check_any(a));
        EXPECT_EQ(false, v_check_any(b));
        EXPECT_EQ(true, v_check_any(c));

        typedef V_TypeTraits<LaneType> Traits;
        typedef typename Traits::int_type int_type;

        R f = v_select(b, d, e);
        Data<R> resF = f;
        for (int i = 0; i < R::nlanes; ++i)
        {
            int_type m2 = Traits::reinterpret_int(dataB[i]);
            EXPECT_EQ((Traits::reinterpret_int(dataD[i]) & m2)
                    | (Traits::reinterpret_int(dataE[i]) & ~m2),
                      Traits::reinterpret_int(resF[i]));
        }

        return *this;
    }
Esempio n. 2
0
inline int v_signmask(const v_float32x4& a)
{ return v_signmask(v_reinterpret_as_u32(a)); }
Esempio n. 3
0
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
{
    Mat img = _img.getMat();
    const int K = patternSize/2, N = patternSize + K + 1;
    int i, j, k, pixel[25];
    makeOffsets(pixel, (int)img.step, patternSize);

#if CV_SIMD128
    const int quarterPatternSize = patternSize/4;
    v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
    bool hasSimd = hasSIMD128();
#if CV_TRY_AVX2
    Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
    if(CV_CPU_HAS_SUPPORT_AVX2)
        fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel);
#endif

#endif

    keypoints.clear();

    threshold = std::min(std::max(threshold, 0), 255);

    uchar threshold_tab[512];
    for( i = -255; i <= 255; i++ )
        threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);

    AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128);
    uchar* buf[3];
    buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
    int* cpbuf[3];
    cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;
    cpbuf[1] = cpbuf[0] + img.cols + 1;
    cpbuf[2] = cpbuf[1] + img.cols + 1;
    memset(buf[0], 0, img.cols*3);

    for(i = 3; i < img.rows-2; i++)
    {
        const uchar* ptr = img.ptr<uchar>(i) + 3;
        uchar* curr = buf[(i - 3)%3];
        int* cornerpos = cpbuf[(i - 3)%3];
        memset(curr, 0, img.cols);
        int ncorners = 0;

        if( i < img.rows - 3 )
        {
            j = 3;
#if CV_SIMD128
            if( hasSimd )
            {
                if( patternSize == 16 )
                {
#if CV_TRY_AVX2
                    if (fast_t_impl_avx2)
                        fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners);
#endif
                    //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements
                    {
                        for (; j < img.cols - 16 - 3; j += 16, ptr += 16)
                        {
                            v_uint8x16 v = v_load(ptr);
                            v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta);
                            v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta);

                            v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta));
                            v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta));
                            v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta));
                            v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta));

                            v_int8x16 m0, m1;
                            m0 = (v0 < x0) & (v0 < x1);
                            m1 = (x0 < v1) & (x1 < v1);
                            m0 = m0 | ((v0 < x1) & (v0 < x2));
                            m1 = m1 | ((x1 < v1) & (x2 < v1));
                            m0 = m0 | ((v0 < x2) & (v0 < x3));
                            m1 = m1 | ((x2 < v1) & (x3 < v1));
                            m0 = m0 | ((v0 < x3) & (v0 < x0));
                            m1 = m1 | ((x3 < v1) & (x0 < v1));
                            m0 = m0 | m1;

                            int mask = v_signmask(m0);
                            if( mask == 0 )
                                continue;
                            if( (mask & 255) == 0 )
                            {
                                j -= 8;
                                ptr -= 8;
                                continue;
                            }

                            v_int8x16 c0 = v_setzero_s8();
                            v_int8x16 c1 = v_setzero_s8();
                            v_uint8x16 max0 = v_setzero_u8();
                            v_uint8x16 max1 = v_setzero_u8();
                            for( k = 0; k < N; k++ )
                            {
                                v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta);
                                m0 = v0 < x;
                                m1 = x < v1;

                                c0 = v_sub_wrap(c0, m0) & m0;
                                c1 = v_sub_wrap(c1, m1) & m1;

                                max0 = v_max(max0, v_reinterpret_as_u8(c0));
                                max1 = v_max(max1, v_reinterpret_as_u8(c1));
                            }

                            max0 = v_max(max0, max1);
                            int m = v_signmask(K16 < max0);

                            for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
                            {
                                if(m & 1)
                                {
                                    cornerpos[ncorners++] = j+k;
                                    if(nonmax_suppression)
                                        curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold);
                                }
                            }
                        }
                    }
                }
            }
#endif
            for( ; j < img.cols - 3; j++, ptr++ )
            {
                int v = ptr[0];
                const uchar* tab = &threshold_tab[0] - v + 255;
                int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];

                if( d == 0 )
                    continue;

                d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
                d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
                d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];

                if( d == 0 )
                    continue;

                d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
                d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
                d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
                d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];

                if( d & 1 )
                {
                    int vt = v - threshold, count = 0;

                    for( k = 0; k < N; k++ )
                    {
                        int x = ptr[pixel[k]];
                        if(x < vt)
                        {
                            if( ++count > K )
                            {
                                cornerpos[ncorners++] = j;
                                if(nonmax_suppression)
                                    curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
                                break;
                            }
                        }
                        else
                            count = 0;
                    }
                }

                if( d & 2 )
                {
                    int vt = v + threshold, count = 0;

                    for( k = 0; k < N; k++ )
                    {
                        int x = ptr[pixel[k]];
                        if(x > vt)
                        {
                            if( ++count > K )
                            {
                                cornerpos[ncorners++] = j;
                                if(nonmax_suppression)
                                    curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
                                break;
                            }
                        }
                        else
                            count = 0;
                    }
                }
            }
        }

        cornerpos[-1] = ncorners;

        if( i == 3 )
            continue;

        const uchar* prev = buf[(i - 4 + 3)%3];
        const uchar* pprev = buf[(i - 5 + 3)%3];
        cornerpos = cpbuf[(i - 4 + 3)%3];
        ncorners = cornerpos[-1];

        for( k = 0; k < ncorners; k++ )
        {
            j = cornerpos[k];
            int score = prev[j];
            if( !nonmax_suppression ||
               (score > prev[j+1] && score > prev[j-1] &&
                score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
                score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
            {
                keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score));
            }
        }
    }
Esempio n. 4
0
inline int v_signmask(const v_int16x8& a)
{ return v_signmask(v_reinterpret_as_u16(a)); }
Esempio n. 5
0
inline int v_signmask(const v_int8x16& a)
{ return v_signmask(v_reinterpret_as_u8(a)); }
Esempio n. 6
0
inline int v_signmask(const v_float64x2& a)
{ return v_signmask(v_reinterpret_as_s64(a)); }
Esempio n. 7
0
inline int v_signmask(const v_uint32x4& a)
{ return v_signmask(v_reinterpret_as_s32(a)); }
Esempio n. 8
0
    void operator()(const Range &boundaries) const
    {
        CV_TRACE_FUNCTION();

        Mat dx, dy;
        AutoBuffer<short> dxMax(0), dyMax(0);
        std::deque<uchar*> stack, borderPeaksLocal;
        const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1);
        int *_mag_p, *_mag_a, *_mag_n;
        short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
        uchar *_pmap;
        double scale = 1.0;

        CV_TRACE_REGION("gradient")
        if(needGradient)
        {
            if (aperture_size == 7)
            {
                scale = 1 / 16.0;
            }
            Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
            Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
        }
        else
        {
            dx = src.rowRange(rowStart, rowEnd);
            dy = src2.rowRange(rowStart, rowEnd);
        }

        CV_TRACE_REGION_NEXT("magnitude");
        if(cn > 1)
        {
            dxMax.allocate(2 * dx.cols);
            dyMax.allocate(2 * dy.cols);
            _dx_a = (short*)dxMax;
            _dx_n = _dx_a + dx.cols;
            _dy_a = (short*)dyMax;
            _dy_n = _dy_a + dy.cols;
        }

        // _mag_p: previous row, _mag_a: actual row, _mag_n: next row
#if CV_SIMD128
        AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128));
        _mag_p = alignPtr((int*)buffer + 1, CV_MALLOC_SIMD128);
        _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128);
        _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128);
#else
        AutoBuffer<int> buffer(3 * (mapstep * cn));
        _mag_p = (int*)buffer + 1;
        _mag_a = _mag_p + mapstep * cn;
        _mag_n = _mag_a + mapstep * cn;
#endif

        // For the first time when just 2 rows are filled and for left and right borders
        if(rowStart == boundaries.start)
            memset(_mag_n - 1, 0, mapstep * sizeof(int));
        else
            _mag_n[src.cols] = _mag_n[-1] = 0;

        _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0;

        // calculate magnitude and angle of gradient, perform non-maxima suppression.
        // fill the map with one of the following values:
        //   0 - the pixel might belong to an edge
        //   1 - the pixel can not belong to an edge
        //   2 - the pixel does belong to an edge
        for (int i = rowStart; i <= boundaries.end; ++i)
        {
            // Scroll the ring buffer
            std::swap(_mag_n, _mag_a);
            std::swap(_mag_n, _mag_p);

            if(i < rowEnd)
            {
                // Next row calculation
                _dx = dx.ptr<short>(i - rowStart);
                _dy = dy.ptr<short>(i - rowStart);

                if (L2gradient)
                {
                    int j = 0, width = src.cols * cn;
#if CV_SIMD128
                    if (haveSIMD)
                    {
                       for ( ; j <= width - 8; j += 8)
                        {
                            v_int16x8 v_dx = v_load((const short*)(_dx + j));
                            v_int16x8 v_dy = v_load((const short*)(_dy + j));

                            v_int32x4 v_dxp_low, v_dxp_high;
                            v_int32x4 v_dyp_low, v_dyp_high;
                            v_expand(v_dx, v_dxp_low, v_dxp_high);
                            v_expand(v_dy, v_dyp_low, v_dyp_high);

                            v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low);
                            v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high);
                        }
                    }
#endif
                    for ( ; j < width; ++j)
                        _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
                }
                else
                {
                    int j = 0, width = src.cols * cn;
#if CV_SIMD128
                    if (haveSIMD)
                    {
                        for(; j <= width - 8; j += 8)
                        {
                            v_int16x8 v_dx = v_load((const short *)(_dx + j));
                            v_int16x8 v_dy = v_load((const short *)(_dy + j));

                            v_dx = v_reinterpret_as_s16(v_abs(v_dx));
                            v_dy = v_reinterpret_as_s16(v_abs(v_dy));

                            v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh;
                            v_expand(v_dx, v_dx_ml, v_dx_mh);
                            v_expand(v_dy, v_dy_ml, v_dy_mh);

                            v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml);
                            v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh);
                        }
                    }
#endif
                    for ( ; j < width; ++j)
                        _mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
                }

                if(cn > 1)
                {
                    std::swap(_dx_n, _dx_a);
                    std::swap(_dy_n, _dy_a);

                    for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
                    {
                        int maxIdx = jn;
                        for(int k = 1; k < cn; ++k)
                            if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k;

                        _mag_n[j] = _mag_n[maxIdx];
                        _dx_n[j] = _dx[maxIdx];
                        _dy_n[j] = _dy[maxIdx];
                    }

                    _mag_n[src.cols] = 0;
                }

                // at the very beginning we do not have a complete ring
                // buffer of 3 magnitude rows for non-maxima suppression
                if (i <= boundaries.start)
                    continue;
            }
            else
            {
                memset(_mag_n - 1, 0, mapstep * sizeof(int));

                if(cn > 1)
                {
                    std::swap(_dx_n, _dx_a);
                    std::swap(_dy_n, _dy_a);
                }
            }

            // From here actual src row is (i - 1)
            // Set left and right border to 1
#if CV_SIMD128
            if(haveSIMD)
                _pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128;
            else
#endif
                _pmap = map.ptr<uchar>(i) + 1;

            _pmap[src.cols] =_pmap[-1] = 1;

            if(cn == 1)
            {
                _dx = dx.ptr<short>(i - rowStart - 1);
                _dy = dy.ptr<short>(i - rowStart - 1);
            }
            else
            {
                _dx = _dx_a;
                _dy = _dy_a;
            }

            const int TG22 = 13573;
            int j = 0;
#if CV_SIMD128
            if (haveSIMD)
            {
                const v_int32x4 v_low = v_setall_s32(low);
                const v_int8x16 v_one = v_setall_s8(1);

                for (; j <= src.cols - 32; j += 32)
                {
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));

                    v_int32x4 v_cmp1 = v_m1 > v_low;
                    v_int32x4 v_cmp2 = v_m2 > v_low;
                    v_int32x4 v_cmp3 = v_m3 > v_low;
                    v_int32x4 v_cmp4 = v_m4 > v_low;

                    v_m1 = v_load_aligned((const int*)(_mag_a + j + 16));
                    v_m2 = v_load_aligned((const int*)(_mag_a + j + 20));
                    v_m3 = v_load_aligned((const int*)(_mag_a + j + 24));
                    v_m4 = v_load_aligned((const int*)(_mag_a + j + 28));

                    v_store_aligned((signed char*)(_pmap + j), v_one);
                    v_store_aligned((signed char*)(_pmap + j + 16), v_one);

                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    v_cmp1 = v_m1 > v_low;
                    v_cmp2 = v_m2 > v_low;
                    v_cmp3 = v_m3 > v_low;
                    v_cmp4 = v_m4 > v_low;

                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);

                    v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    unsigned int mask = v_signmask(v_cmp);

                    v_cmp = v_pack(v_cmp80, v_cmp81);
                    mask |= v_signmask(v_cmp) << 16;

                    if (mask)
                    {
                        int k = j;

                        do
                        {
                            int l = trailingZeros32(mask);
                            k += l;
                            mask >>= l;

                            int m = _mag_a[k];
                            short xs = _dx[k];
                            short ys = _dy[k];
                            int x = (int)std::abs(xs);
                            int y = (int)std::abs(ys) << 15;

                            int tg22x = x * TG22;

                            if (y < tg22x)
                            {
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
                                {
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                }
                            }
                            else
                            {
                                int tg67x = tg22x + (x << 16);
                                if (y > tg67x)
                                {
                                    if (m > _mag_p[k] && m >= _mag_n[k])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                                else
                                {
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                            }
                            ++k;
                        } while((mask >>= 1));
                    }
                }

                if (j <= src.cols - 16)
                {
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));

                    v_store_aligned((signed char*)(_pmap + j), v_one);

                    v_int32x4 v_cmp1 = v_m1 > v_low;
                    v_int32x4 v_cmp2 = v_m2 > v_low;
                    v_int32x4 v_cmp3 = v_m3 > v_low;
                    v_int32x4 v_cmp4 = v_m4 > v_low;

                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
                    unsigned int mask = v_signmask(v_cmp);

                    if (mask)
                    {
                        int k = j;

                        do
                        {
                            int l = trailingZeros32(mask);
                            k += l;
                            mask >>= l;

                            int m = _mag_a[k];
                            short xs = _dx[k];
                            short ys = _dy[k];
                            int x = (int)std::abs(xs);
                            int y = (int)std::abs(ys) << 15;

                            int tg22x = x * TG22;

                            if (y < tg22x)
                            {
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
                                {
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                }
                            }
                            else
                            {
                                int tg67x = tg22x + (x << 16);
                                if (y > tg67x)
                                {
                                    if (m > _mag_p[k] && m >= _mag_n[k])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                                else
                                {
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                            }
                            ++k;
                        } while((mask >>= 1));
                    }
                    j += 16;
                }
            }
#endif
            for (; j < src.cols; j++)
            {
                int m = _mag_a[j];

                if (m > low)
                {
                    short xs = _dx[j];
                    short ys = _dy[j];
                    int x = (int)std::abs(xs);
                    int y = (int)std::abs(ys) << 15;

                    int tg22x = x * TG22;

                    if (y < tg22x)
                    {
                        if (m > _mag_a[j - 1] && m >= _mag_a[j + 1])
                        {
                            CANNY_CHECK(m, high, (_pmap+j), stack);
                        }
                    }
                    else
                    {
                        int tg67x = tg22x + (x << 16);
                        if (y > tg67x)
                        {
                            if (m > _mag_p[j] && m >= _mag_n[j])
                            {
                                CANNY_CHECK(m, high, (_pmap+j), stack);
                            }
                        }
                        else
                        {
                            int s = (xs ^ ys) < 0 ? -1 : 1;
                            if(m > _mag_p[j - s] && m > _mag_n[j + s])
                            {
                                CANNY_CHECK(m, high, (_pmap+j), stack);
                            }
                        }
                    }
                }
                _pmap[j] = 1;
            }
        }