C++ (Cpp) v_load_aligned示例

编程语言: C++ (Cpp)

方法/功能: v_load_aligned

hotexamples.com的示例: 2

C++ (Cpp) v_load_aligned - 已找到2个示例。这些是从开源项目中提取的最受好评的v_load_aligned现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_intrin_utils.hpp 项目： gini/opencv

    TheTest & test_loadstore()
    {
        AlignedData<R> data;
        AlignedData<R> out;

        // check if addresses are aligned and unaligned respectively
        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);

        // check some initialization methods
        R r1 = data.a;
        R r2 = v_load(data.u.d);
        R r3 = v_load_aligned(data.a.d);
        R r4(r2);
        EXPECT_EQ(data.a[0], r1.get0());
        EXPECT_EQ(data.u[0], r2.get0());
        EXPECT_EQ(data.a[0], r3.get0());
        EXPECT_EQ(data.u[0], r4.get0());

        R r_low = v_load_low((LaneType*)data.u.d);
        EXPECT_EQ(data.u[0], r_low.get0());
        v_store(out.u.d, r_low);
        for (int i = 0; i < R::nlanes/2; ++i)
        {
            EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
        }

        R r_low_align8byte = v_load_low((LaneType*)((char*)data.u.d + 8));
        EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
        v_store(out.u.d, r_low_align8byte);
        for (int i = 0; i < R::nlanes/2; ++i)
        {
            EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]);
        }

        // check some store methods
        out.u.clear();
        out.a.clear();
        v_store(out.u.d, r1);
        v_store_aligned(out.a.d, r2);
        EXPECT_EQ(data.a, out.a);
        EXPECT_EQ(data.u, out.u);

        // check more store methods
        Data<R> d, res(0);
        R r5 = d;
        v_store_high(res.mid(), r5);
        v_store_low(res.d, r5);
        EXPECT_EQ(d, res);

        // check halves load correctness
        res.clear();
        R r6 = v_load_halves(d.d, d.mid());
        v_store(res.d, r6);
        EXPECT_EQ(d, res);

        // zero, all
        Data<R> resZ = V_RegTrait128<LaneType>::zero();
        Data<R> resV = V_RegTrait128<LaneType>::all(8);
        for (int i = 0; i < R::nlanes; ++i)
        {
            EXPECT_EQ((LaneType)0, resZ[i]);
            EXPECT_EQ((LaneType)8, resV[i]);
        }

        // reinterpret_as
        v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
        v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
        v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
        v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
        v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
        v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
        v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
        v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
        v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
#if CV_SIMD128_64F
        v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
#endif

        return *this;
    }

示例#2

显示文件

文件： canny.cpp 项目： cyberCBM/DetectO

    void operator()(const Range &boundaries) const
    {
        CV_TRACE_FUNCTION();

        Mat dx, dy;
        AutoBuffer<short> dxMax(0), dyMax(0);
        std::deque<uchar*> stack, borderPeaksLocal;
        const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1);
        int *_mag_p, *_mag_a, *_mag_n;
        short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
        uchar *_pmap;
        double scale = 1.0;

        CV_TRACE_REGION("gradient")
        if(needGradient)
        {
            if (aperture_size == 7)
            {
                scale = 1 / 16.0;
            }
            Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
            Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
        }
        else
        {
            dx = src.rowRange(rowStart, rowEnd);
            dy = src2.rowRange(rowStart, rowEnd);
        }

        CV_TRACE_REGION_NEXT("magnitude");
        if(cn > 1)
        {
            dxMax.allocate(2 * dx.cols);
            dyMax.allocate(2 * dy.cols);
            _dx_a = (short*)dxMax;
            _dx_n = _dx_a + dx.cols;
            _dy_a = (short*)dyMax;
            _dy_n = _dy_a + dy.cols;
        }

        // _mag_p: previous row, _mag_a: actual row, _mag_n: next row
#if CV_SIMD128
        AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128));
        _mag_p = alignPtr((int*)buffer + 1, CV_MALLOC_SIMD128);
        _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128);
        _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128);
#else
        AutoBuffer<int> buffer(3 * (mapstep * cn));
        _mag_p = (int*)buffer + 1;
        _mag_a = _mag_p + mapstep * cn;
        _mag_n = _mag_a + mapstep * cn;
#endif

        // For the first time when just 2 rows are filled and for left and right borders
        if(rowStart == boundaries.start)
            memset(_mag_n - 1, 0, mapstep * sizeof(int));
        else
            _mag_n[src.cols] = _mag_n[-1] = 0;

        _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0;

        // calculate magnitude and angle of gradient, perform non-maxima suppression.
        // fill the map with one of the following values:
        //   0 - the pixel might belong to an edge
        //   1 - the pixel can not belong to an edge
        //   2 - the pixel does belong to an edge
        for (int i = rowStart; i <= boundaries.end; ++i)
        {
            // Scroll the ring buffer
            std::swap(_mag_n, _mag_a);
            std::swap(_mag_n, _mag_p);

            if(i < rowEnd)
            {
                // Next row calculation
                _dx = dx.ptr<short>(i - rowStart);
                _dy = dy.ptr<short>(i - rowStart);

                if (L2gradient)
                {
                    int j = 0, width = src.cols * cn;
#if CV_SIMD128
                    if (haveSIMD)
                    {
                       for ( ; j <= width - 8; j += 8)
                        {
                            v_int16x8 v_dx = v_load((const short*)(_dx + j));
                            v_int16x8 v_dy = v_load((const short*)(_dy + j));

                            v_int32x4 v_dxp_low, v_dxp_high;
                            v_int32x4 v_dyp_low, v_dyp_high;
                            v_expand(v_dx, v_dxp_low, v_dxp_high);
                            v_expand(v_dy, v_dyp_low, v_dyp_high);

                            v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low);
                            v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high);
                        }
                    }
#endif
                    for ( ; j < width; ++j)
                        _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
                }
                else
                {
                    int j = 0, width = src.cols * cn;
#if CV_SIMD128
                    if (haveSIMD)
                    {
                        for(; j <= width - 8; j += 8)
                        {
                            v_int16x8 v_dx = v_load((const short *)(_dx + j));
                            v_int16x8 v_dy = v_load((const short *)(_dy + j));

                            v_dx = v_reinterpret_as_s16(v_abs(v_dx));
                            v_dy = v_reinterpret_as_s16(v_abs(v_dy));

                            v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh;
                            v_expand(v_dx, v_dx_ml, v_dx_mh);
                            v_expand(v_dy, v_dy_ml, v_dy_mh);

                            v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml);
                            v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh);
                        }
                    }
#endif
                    for ( ; j < width; ++j)
                        _mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
                }

                if(cn > 1)
                {
                    std::swap(_dx_n, _dx_a);
                    std::swap(_dy_n, _dy_a);

                    for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
                    {
                        int maxIdx = jn;
                        for(int k = 1; k < cn; ++k)
                            if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k;

                        _mag_n[j] = _mag_n[maxIdx];
                        _dx_n[j] = _dx[maxIdx];
                        _dy_n[j] = _dy[maxIdx];
                    }

                    _mag_n[src.cols] = 0;
                }

                // at the very beginning we do not have a complete ring
                // buffer of 3 magnitude rows for non-maxima suppression
                if (i <= boundaries.start)
                    continue;
            }
            else
            {
                memset(_mag_n - 1, 0, mapstep * sizeof(int));

                if(cn > 1)
                {
                    std::swap(_dx_n, _dx_a);
                    std::swap(_dy_n, _dy_a);
                }
            }

            // From here actual src row is (i - 1)
            // Set left and right border to 1
#if CV_SIMD128
            if(haveSIMD)
                _pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128;
            else
#endif
                _pmap = map.ptr<uchar>(i) + 1;

            _pmap[src.cols] =_pmap[-1] = 1;

            if(cn == 1)
            {
                _dx = dx.ptr<short>(i - rowStart - 1);
                _dy = dy.ptr<short>(i - rowStart - 1);
            }
            else
            {
                _dx = _dx_a;
                _dy = _dy_a;
            }

            const int TG22 = 13573;
            int j = 0;
#if CV_SIMD128
            if (haveSIMD)
            {
                const v_int32x4 v_low = v_setall_s32(low);
                const v_int8x16 v_one = v_setall_s8(1);

                for (; j <= src.cols - 32; j += 32)
                {
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));

                    v_int32x4 v_cmp1 = v_m1 > v_low;
                    v_int32x4 v_cmp2 = v_m2 > v_low;
                    v_int32x4 v_cmp3 = v_m3 > v_low;
                    v_int32x4 v_cmp4 = v_m4 > v_low;

                    v_m1 = v_load_aligned((const int*)(_mag_a + j + 16));
                    v_m2 = v_load_aligned((const int*)(_mag_a + j + 20));
                    v_m3 = v_load_aligned((const int*)(_mag_a + j + 24));
                    v_m4 = v_load_aligned((const int*)(_mag_a + j + 28));

                    v_store_aligned((signed char*)(_pmap + j), v_one);
                    v_store_aligned((signed char*)(_pmap + j + 16), v_one);

                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    v_cmp1 = v_m1 > v_low;
                    v_cmp2 = v_m2 > v_low;
                    v_cmp3 = v_m3 > v_low;
                    v_cmp4 = v_m4 > v_low;

                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);

                    v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    unsigned int mask = v_signmask(v_cmp);

                    v_cmp = v_pack(v_cmp80, v_cmp81);
                    mask |= v_signmask(v_cmp) << 16;

                    if (mask)
                    {
                        int k = j;

                        do
                        {
                            int l = trailingZeros32(mask);
                            k += l;
                            mask >>= l;

                            int m = _mag_a[k];
                            short xs = _dx[k];
                            short ys = _dy[k];
                            int x = (int)std::abs(xs);
                            int y = (int)std::abs(ys) << 15;

                            int tg22x = x * TG22;

                            if (y < tg22x)
                            {
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
                                {
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                }
                            }
                            else
                            {
                                int tg67x = tg22x + (x << 16);
                                if (y > tg67x)
                                {
                                    if (m > _mag_p[k] && m >= _mag_n[k])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                                else
                                {
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                            }
                            ++k;
                        } while((mask >>= 1));
                    }
                }

                if (j <= src.cols - 16)
                {
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));

                    v_store_aligned((signed char*)(_pmap + j), v_one);

                    v_int32x4 v_cmp1 = v_m1 > v_low;
                    v_int32x4 v_cmp2 = v_m2 > v_low;
                    v_int32x4 v_cmp3 = v_m3 > v_low;
                    v_int32x4 v_cmp4 = v_m4 > v_low;

                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);

                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
                    unsigned int mask = v_signmask(v_cmp);

                    if (mask)
                    {
                        int k = j;

                        do
                        {
                            int l = trailingZeros32(mask);
                            k += l;
                            mask >>= l;

                            int m = _mag_a[k];
                            short xs = _dx[k];
                            short ys = _dy[k];
                            int x = (int)std::abs(xs);
                            int y = (int)std::abs(ys) << 15;

                            int tg22x = x * TG22;

                            if (y < tg22x)
                            {
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
                                {
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                }
                            }
                            else
                            {
                                int tg67x = tg22x + (x << 16);
                                if (y > tg67x)
                                {
                                    if (m > _mag_p[k] && m >= _mag_n[k])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                                else
                                {
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
                                    {
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
                                    }
                                }
                            }
                            ++k;
                        } while((mask >>= 1));
                    }
                    j += 16;
                }
            }
#endif
            for (; j < src.cols; j++)
            {
                int m = _mag_a[j];

                if (m > low)
                {
                    short xs = _dx[j];
                    short ys = _dy[j];
                    int x = (int)std::abs(xs);
                    int y = (int)std::abs(ys) << 15;

                    int tg22x = x * TG22;

                    if (y < tg22x)
                    {
                        if (m > _mag_a[j - 1] && m >= _mag_a[j + 1])
                        {
                            CANNY_CHECK(m, high, (_pmap+j), stack);
                        }
                    }
                    else
                    {
                        int tg67x = tg22x + (x << 16);
                        if (y > tg67x)
                        {
                            if (m > _mag_p[j] && m >= _mag_n[j])
                            {
                                CANNY_CHECK(m, high, (_pmap+j), stack);
                            }
                        }
                        else
                        {
                            int s = (xs ^ ys) < 0 ? -1 : 1;
                            if(m > _mag_p[j - s] && m > _mag_n[j + s])
                            {
                                CANNY_CHECK(m, high, (_pmap+j), stack);
                            }
                        }
                    }
                }
                _pmap[j] = 1;
            }
        }