CV_IPP_CHECK() { if (ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); }
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height) { if (!CV_IPP_CHECK_COND) return 0; if (height == 1) step1 = step = width * sizeof(dst[0]); if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height))) { CV_IMPL_ADD(CV_IMPL_IPP); return 1; } setIppErrorStatus(); return 0; }
static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int ksize, double scale) { int bufSize = 0; cv::AutoBuffer<char> buffer; if (ksize == 3 || ksize == 5) { if ( ddepth < 0 ) ddepth = src.depth(); if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1) { if ((dx == 1) && (dy == 0)) { if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } if ((dx == 0) && (dy == 1)) { if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } if ((dx == 2) && (dy == 0)) { if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } if ((dx == 0) && (dy == 2)) { if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } } if (src.type() == CV_32F && dst.type() == CV_32F) { #if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R return false; #else #if 0 if ((dx == 1) && (dy == 0)) { if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) { return false; } if(scale != 1) ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } if ((dx == 0) && (dy == 1)) { if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) { return false; } if(scale != 1) ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } #endif if((dx == 2) && (dy == 0)) { if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) { return false; } if(scale != 1) ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } if((dx == 0) && (dy == 2)) { if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) { return false; } if(scale != 1) ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } #endif } } if(ksize <= 0) return IPPDerivScharr(src, dst, ddepth, dx, dy, scale); return false; }
static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, double scale) { int bufSize = 0; cv::AutoBuffer<char> buffer; IppiSize roi = ippiSize(src.cols, src.rows); if( ddepth < 0 ) ddepth = src.depth(); dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); switch(src.type()) { case CV_8U: { if(scale != 1) return false; switch(dst.type()) { case CV_16S: { if ((dx == 1) && (dy == 0)) { if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } if ((dx == 0) && (dy == 1)) { if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize)) return false; buffer.allocate(bufSize); return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); } return false; } default: return false; } } case CV_32F: #if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R return false; #else { switch(dst.type()) { case CV_32F: { if ((dx == 1) && (dy == 0)) { if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) { return false; } if (scale != 1) /* IPP is fast, so MulC produce very little perf degradation.*/ //ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); ippiMulC_32f_C1R((Ipp32f*)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } if ((dx == 0) && (dy == 1)) { if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) return false; buffer.allocate(bufSize); if (0 > ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) return false; if (scale != 1) ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } } default: return false; } } #endif default: return false; } }
inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height)); }
inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)); }
inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0); }
inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0); }
inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2, float *dst, size_t step, int width, int height, double scale) { ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height)); }
inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2, short *dst, size_t step, int width, int height, double scale) { ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0); }
inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2, uchar *dst, size_t step, int width, int height, double scale) { ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0); }
inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, int cmpop) { ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)); }
if( mask[x+2] ) dst[x+2] = src[x+2]; if( mask[x+3] ) dst[x+3] = src[x+3]; } #endif for( ; x < size.width; x++ ) if( mask[x] ) dst[x] = src[x]; } } template<> void copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size) { CV_IPP_RUN(true, ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0) for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep ) { const uchar* src = (const uchar*)_src; uchar* dst = (uchar*)_dst; int x = 0; #if CV_SSE4_2 if(USE_SSE4_2)// { __m128i zero = _mm_setzero_si128 (); for( ; x <= size.width - 16; x += 16 ) { const __m128i rSrc = _mm_lddqu_si128((const __m128i*)(src+x)); __m128i _mask = _mm_lddqu_si128((const __m128i*)(mask+x));
inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0); }
void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, double timestamp, double duration ) { CV_Assert( _silhouette.type() == CV_8UC1 && _mhi.type() == CV_32FC1 ); CV_Assert( _silhouette.sameSize(_mhi) ); float ts = (float)timestamp; float delbound = (float)(timestamp - duration); CV_OCL_RUN(_mhi.isUMat() && _mhi.dims() <= 2, ocl_updateMotionHistory(_silhouette, _mhi, ts, delbound)) Mat silh = _silhouette.getMat(), mhi = _mhi.getMat(); Size size = silh.size(); #ifdef HAVE_IPP int silhstep = (int)silh.step, mhistep = (int)mhi.step; #endif if( silh.isContinuous() && mhi.isContinuous() ) { size.width *= size.height; size.height = 1; #ifdef HAVE_IPP silhstep = (int)silh.total(); mhistep = (int)mhi.total() * sizeof(Ipp32f); #endif } #ifdef HAVE_IPP IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep, ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration); if (status >= 0) return; #endif #if CV_SSE2 volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2); #endif for(int y = 0; y < size.height; y++ ) { const uchar* silhData = silh.ptr<uchar>(y); float* mhiData = mhi.ptr<float>(y); int x = 0; #if CV_SSE2 if( useSIMD ) { __m128 ts4 = _mm_set1_ps(ts), db4 = _mm_set1_ps(delbound); for( ; x <= size.width - 8; x += 8 ) { __m128i z = _mm_setzero_si128(); __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(silhData + x)), z); __m128 s0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, z)), s1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, z)); __m128 v0 = _mm_loadu_ps(mhiData + x), v1 = _mm_loadu_ps(mhiData + x + 4); __m128 fz = _mm_setzero_ps(); v0 = _mm_and_ps(v0, _mm_cmpge_ps(v0, db4)); v1 = _mm_and_ps(v1, _mm_cmpge_ps(v1, db4)); __m128 m0 = _mm_and_ps(_mm_xor_ps(v0, ts4), _mm_cmpneq_ps(s0, fz)); __m128 m1 = _mm_and_ps(_mm_xor_ps(v1, ts4), _mm_cmpneq_ps(s1, fz)); v0 = _mm_xor_ps(v0, m0); v1 = _mm_xor_ps(v1, m1); _mm_storeu_ps(mhiData + x, v0); _mm_storeu_ps(mhiData + x + 4, v1); } } #endif for( ; x < size.width; x++ ) { float val = mhiData[x]; val = silhData[x] ? ts : val < delbound ? 0 : val; mhiData[x] = val; } } }
if( mask[x+2] ) dst[x+2] = src[x+2]; if( mask[x+3] ) dst[x+3] = src[x+3]; } #endif for( ; x < size.width; x++ ) if( mask[x] ) dst[x] = src[x]; } } template<> void copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size) { CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C1MR, _src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0) for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep ) { const uchar* src = (const uchar*)_src; uchar* dst = (uchar*)_dst; int x = 0; #if CV_SIMD128 { v_uint8x16 v_zero = v_setzero_u8(); for( ; x <= size.width - 16; x += 16 ) { v_uint8x16 v_src = v_load(src + x), v_dst = v_load(dst + x), v_nmask = v_load(mask + x) == v_zero;
inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height) { ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)); }
static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst) { ::ipp::IwiBorderSize inMemBorder; if(src.isSubmatrix()) // already have physical border { cv::Size origSize; cv::Point offset; src.locateROI(origSize, offset); inMemBorder.left = (IwSize)offset.x; inMemBorder.top = (IwSize)offset.y; inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x); inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y); } dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step); }