Beispiel #1
0
 CV_IPP_CHECK()
 {
     if (ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0)
     {
         CV_IMPL_ADD(CV_IMPL_IPP);
         return;
     }
     setIppErrorStatus();
 }
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height)
{
    if (!CV_IPP_CHECK_COND)
        return 0;
    if (height == 1)
        step1 = step = width * sizeof(dst[0]);
    if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height)))
    {
        CV_IMPL_ADD(CV_IMPL_IPP);
        return 1;
    }
    setIppErrorStatus();
    return 0;
}
Beispiel #3
0
static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int ksize, double scale)
{
    int bufSize = 0;
    cv::AutoBuffer<char> buffer;
    if (ksize == 3 || ksize == 5)
    {
        if ( ddepth < 0 )
            ddepth = src.depth();

        if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1)
        {
            if ((dx == 1) && (dy == 0))
            {
                if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                return (0 <= ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
            }

            if ((dx == 0) && (dy == 1))
            {
                if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                return (0 <= ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
            }

            if ((dx == 2) && (dy == 0))
            {
                if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                return (0 <= ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
            }

            if ((dx == 0) && (dy == 2))
            {
                if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                return (0 <= ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                    (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                    ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
            }
        }

        if (src.type() == CV_32F && dst.type() == CV_32F)
        {
#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R
            return false;
#else
#if 0
            if ((dx == 1) && (dy == 0))
            {
                if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
                    return false;
                buffer.allocate(bufSize);

                if (0 > ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                {
                    return false;
                }
                if(scale != 1)
                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                return true;
            }

            if ((dx == 0) && (dy == 1))
            {
                if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                if (0 > ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                {
                    return false;
                }
                if(scale != 1)
                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                return true;
            }
#endif

            if((dx == 2) && (dy == 0))
            {
                if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                if (0 > ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                {
                    return false;
                }
                if(scale != 1)
                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                return true;
            }

            if((dx == 0) && (dy == 2))
            {
                if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
                    return false;
                buffer.allocate(bufSize);

                if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
                                ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                {
                    return false;
                }

                if(scale != 1)
                    ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                return true;
            }
#endif
        }
    }

    if(ksize <= 0)
        return IPPDerivScharr(src, dst, ddepth, dx, dy, scale);
    return false;
}
Beispiel #4
0
static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, double scale)
{
    int bufSize = 0;
    cv::AutoBuffer<char> buffer;
    IppiSize roi = ippiSize(src.cols, src.rows);

    if( ddepth < 0 )
        ddepth = src.depth();

    dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );

    switch(src.type())
    {
    case CV_8U:
        {
            if(scale != 1)
                return false;

            switch(dst.type())
            {
            case CV_16S:
                {
                    if ((dx == 1) && (dy == 0))
                    {
                        if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize))
                            return false;
                        buffer.allocate(bufSize);
                        return (0 <= ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                        (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
                    }
                    if ((dx == 0) && (dy == 1))
                    {
                        if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize))
                            return false;
                        buffer.allocate(bufSize);
                        return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step,
                                            (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
                    }
                    return false;
                }
            default:
                return false;
            }
        }
    case CV_32F:
#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R
        return false;
#else
        {
            switch(dst.type())
            {
            case CV_32F:
                {
                    if ((dx == 1) && (dy == 0))
                    {
                        if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
                            return false;
                        buffer.allocate(bufSize);

                        if (0 > ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                        (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
                                        ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                        {
                            return false;
                        }

                        if (scale != 1)
                            /* IPP is fast, so MulC produce very little perf degradation.*/
                            //ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                            ippiMulC_32f_C1R((Ipp32f*)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                        return true;
                    }
                    if ((dx == 0) && (dy == 1))
                    {
                        if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
                            return false;
                        buffer.allocate(bufSize);

                        if (0 > ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step,
                                        (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows),
                                        ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
                            return false;

                        if (scale != 1)
                            ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                        return true;
                    }
                }
            default:
                return false;
            }
        }
#endif
    default:
        return false;
    }
}
inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2,
                            float* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
                            uchar* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
                            uchar* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
                             ushort* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2,
                            float *dst, size_t step, int width, int height, double scale)
{
    ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2,
                            short *dst, size_t step, int width, int height, double scale)
{
    ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2,
                            uchar *dst, size_t step, int width, int height, double scale)
{
    ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
                             uchar* dst, size_t step, int width, int height, int cmpop)
{
    ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
Beispiel #13
0
            if( mask[x+2] )
                dst[x+2] = src[x+2];
            if( mask[x+3] )
                dst[x+3] = src[x+3];
        }
        #endif
        for( ; x < size.width; x++ )
            if( mask[x] )
                dst[x] = src[x];
    }
}

template<> void
copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
{
    CV_IPP_RUN(true, ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0)

    for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
    {
        const uchar* src = (const uchar*)_src;
        uchar* dst = (uchar*)_dst;
        int x = 0;
        #if CV_SSE4_2
        if(USE_SSE4_2)//
        {
            __m128i zero = _mm_setzero_si128 ();

             for( ; x <= size.width - 16; x += 16 )
             {
                 const __m128i rSrc = _mm_lddqu_si128((const __m128i*)(src+x));
                 __m128i _mask = _mm_lddqu_si128((const __m128i*)(mask+x));
inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2,
                            short* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
Beispiel #15
0
void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi,
                              double timestamp, double duration )
{
    CV_Assert( _silhouette.type() == CV_8UC1 && _mhi.type() == CV_32FC1 );
    CV_Assert( _silhouette.sameSize(_mhi) );

    float ts = (float)timestamp;
    float delbound = (float)(timestamp - duration);

    CV_OCL_RUN(_mhi.isUMat() && _mhi.dims() <= 2,
               ocl_updateMotionHistory(_silhouette, _mhi, ts, delbound))

    Mat silh = _silhouette.getMat(), mhi = _mhi.getMat();
    Size size = silh.size();
#ifdef HAVE_IPP
    int silhstep = (int)silh.step, mhistep = (int)mhi.step;
#endif

    if( silh.isContinuous() && mhi.isContinuous() )
    {
        size.width *= size.height;
        size.height = 1;
#ifdef HAVE_IPP
        silhstep = (int)silh.total();
        mhistep = (int)mhi.total() * sizeof(Ipp32f);
#endif
    }

#ifdef HAVE_IPP
    IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep,
                                                          ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration);
    if (status >= 0)
        return;
#endif

#if CV_SSE2
    volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2);
#endif

    for(int y = 0; y < size.height; y++ )
    {
        const uchar* silhData = silh.ptr<uchar>(y);
        float* mhiData = mhi.ptr<float>(y);
        int x = 0;

#if CV_SSE2
        if( useSIMD )
        {
            __m128 ts4 = _mm_set1_ps(ts), db4 = _mm_set1_ps(delbound);
            for( ; x <= size.width - 8; x += 8 )
            {
                __m128i z = _mm_setzero_si128();
                __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(silhData + x)), z);
                __m128 s0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, z)), s1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, z));
                __m128 v0 = _mm_loadu_ps(mhiData + x), v1 = _mm_loadu_ps(mhiData + x + 4);
                __m128 fz = _mm_setzero_ps();

                v0 = _mm_and_ps(v0, _mm_cmpge_ps(v0, db4));
                v1 = _mm_and_ps(v1, _mm_cmpge_ps(v1, db4));

                __m128 m0 = _mm_and_ps(_mm_xor_ps(v0, ts4), _mm_cmpneq_ps(s0, fz));
                __m128 m1 = _mm_and_ps(_mm_xor_ps(v1, ts4), _mm_cmpneq_ps(s1, fz));

                v0 = _mm_xor_ps(v0, m0);
                v1 = _mm_xor_ps(v1, m1);

                _mm_storeu_ps(mhiData + x, v0);
                _mm_storeu_ps(mhiData + x + 4, v1);
            }
        }
#endif

        for( ; x < size.width; x++ )
        {
            float val = mhiData[x];
            val = silhData[x] ? ts : val < delbound ? 0 : val;
            mhiData[x] = val;
        }
    }
}
Beispiel #16
0
            if( mask[x+2] )
                dst[x+2] = src[x+2];
            if( mask[x+3] )
                dst[x+3] = src[x+3];
        }
        #endif
        for( ; x < size.width; x++ )
            if( mask[x] )
                dst[x] = src[x];
    }
}

template<> void
copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
{
    CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1MR, _src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0)

    for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
    {
        const uchar* src = (const uchar*)_src;
        uchar* dst = (uchar*)_dst;
        int x = 0;
        #if CV_SIMD128
        {
            v_uint8x16 v_zero = v_setzero_u8();

            for( ; x <= size.width - 16; x += 16 )
            {
                v_uint8x16 v_src   = v_load(src  + x),
                           v_dst   = v_load(dst  + x),
                           v_nmask = v_load(mask + x) == v_zero;
inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
                                ushort* dst, size_t step, int width, int height)
{
    ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
Beispiel #18
-20
static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst)
{
    ::ipp::IwiBorderSize inMemBorder;
    if(src.isSubmatrix()) // already have physical border
    {
        cv::Size  origSize;
        cv::Point offset;
        src.locateROI(origSize, offset);

        inMemBorder.left   = (IwSize)offset.x;
        inMemBorder.top    = (IwSize)offset.y;
        inMemBorder.right  = (IwSize)(origSize.width - src.cols - offset.x);
        inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y);
    }

    dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step);
}