CV_IPP_CHECK() { if (ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); }
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height) { if (!CV_IPP_CHECK_COND) return 0; if (height == 1) step1 = step = width * sizeof(dst[0]); if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height))) { CV_IMPL_ADD(CV_IMPL_IPP); return 1; } setIppErrorStatus(); return 0; }
static double getThreshVal_Otsu_8u( const Mat& _src ) { Size size = _src.size(); int step = (int) _src.step; if( _src.isContinuous() ) { size.width *= size.height; size.height = 1; step = size.width; } #if IPP_VERSION_X100 >= 801 && !defined(HAVE_IPP_ICV_ONLY) IppiSize srcSize = { size.width, size.height }; Ipp8u thresh; CV_SUPPRESS_DEPRECATED_START IppStatus ok = ippiComputeThreshold_Otsu_8u_C1R(_src.data, step, srcSize, &thresh); CV_SUPPRESS_DEPRECATED_END if (ok >= 0) return thresh; setIppErrorStatus(); #endif const int N = 256; int i, j, h[N] = {0}; for( i = 0; i < size.height; i++ ) { const uchar* src = _src.data + step*i; j = 0; #if CV_ENABLE_UNROLLED for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; h[v0]++; h[v1]++; v0 = src[j+2]; v1 = src[j+3]; h[v0]++; h[v1]++; } #endif for( ; j < size.width; j++ ) h[src[j]]++; } double mu = 0, scale = 1./(size.width*size.height); for( i = 0; i < N; i++ ) mu += i*(double)h[i]; mu *= scale; double mu1 = 0, q1 = 0; double max_sigma = 0, max_val = 0; for( i = 0; i < N; i++ ) { double p_i, q2, mu2, sigma; p_i = h[i]*scale; mu1 *= q1; q1 += p_i; q2 = 1. - q1; if( std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON ) continue; mu1 = (mu1 + i*p_i)/q1; mu2 = (mu - q1*mu1)/q2; sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2); if( sigma > max_sigma ) { max_sigma = sigma; max_val = i; } } return max_val; }
static void thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const float* src = (const float*)_src.data; float* dst = (float*)_dst.data; size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_32f(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) IppiSize sz = { roi.width, roi.height }; switch( type ) { case THRESH_TRUNC: if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) return; setIppErrorStatus(); break; case THRESH_TOZERO: if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0)) return; setIppErrorStatus(); break; case THRESH_TOZERO_INV: if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) return; setIppErrorStatus(); break; } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmpgt_ps( v0, thresh4 ); v1 = _mm_cmpgt_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmple_ps( v0, thresh4 ); v1 = _mm_cmple_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_min_ps( v0, thresh4 ); v1 = _mm_min_ps( v1, thresh4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
static void thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const short* src = (const short*)_src.data; short* dst = (short*)_dst.data; size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) IppiSize sz = { roi.width, roi.height }; switch( type ) { case THRESH_TRUNC: if (0 <= ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) return; setIppErrorStatus(); break; case THRESH_TOZERO: if (0 <= ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0)) return; setIppErrorStatus(); break; case THRESH_TOZERO_INV: if (0 <= ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) return; setIppErrorStatus(); break; } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_and_si128( v0, maxval8 ); v1 = _mm_and_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_andnot_si128( v0, maxval8 ); v1 = _mm_andnot_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_min_epi16( v0, thresh8 ); v1 = _mm_min_epi16( v1, thresh8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
static void HoughLinesProbabilistic( Mat& image, float rho, float theta, int threshold, int lineLength, int lineGap, std::vector<Vec4i>& lines, int linesMax ) { Point pt; float irho = 1 / rho; RNG rng((uint64)-1); CV_Assert( image.type() == CV_8UC1 ); int width = image.cols; int height = image.rows; int numangle = cvRound(CV_PI / theta); int numrho = cvRound(((width + height) * 2 + 1) / rho); #if defined HAVE_IPP && !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK CV_IPP_CHECK() { IppiSize srcSize = { width, height }; IppPointPolar delta = { rho, theta }; IppiHoughProbSpec* pSpec; int bufferSize, specSize; int ipp_linesMax = std::min(linesMax, numangle*numrho); int linesCount = 0; lines.resize(ipp_linesMax); IppStatus ok = ippiHoughProbLineGetSize_8u_C1R(srcSize, delta, &specSize, &bufferSize); Ipp8u* buffer = ippsMalloc_8u(bufferSize); pSpec = (IppiHoughProbSpec*) malloc(specSize); if (ok >= 0) ok = ippiHoughProbLineInit_8u32f_C1R(srcSize, delta, ippAlgHintNone, pSpec); if (ok >= 0) ok = ippiHoughProbLine_8u32f_C1R(image.data, image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec); free(pSpec); ippsFree(buffer); if (ok >= 0) { lines.resize(linesCount); CV_IMPL_ADD(CV_IMPL_IPP); return; } lines.clear(); setIppErrorStatus(); } #endif Mat accum = Mat::zeros( numangle, numrho, CV_32SC1 ); Mat mask( height, width, CV_8UC1 ); std::vector<float> trigtab(numangle*2); for( int n = 0; n < numangle; n++ ) { trigtab[n*2] = (float)(cos((double)n*theta) * irho); trigtab[n*2+1] = (float)(sin((double)n*theta) * irho); } const float* ttab = &trigtab[0]; uchar* mdata0 = mask.ptr(); std::vector<Point> nzloc; // stage 1. collect non-zero image points for( pt.y = 0; pt.y < height; pt.y++ ) { const uchar* data = image.ptr(pt.y); uchar* mdata = mask.ptr(pt.y); for( pt.x = 0; pt.x < width; pt.x++ ) { if( data[pt.x] ) { mdata[pt.x] = (uchar)1; nzloc.push_back(pt); } else mdata[pt.x] = 0; } } int count = (int)nzloc.size(); // stage 2. process all the points in random order for( ; count > 0; count-- ) { // choose random point out of the remaining ones int idx = rng.uniform(0, count); int max_val = threshold-1, max_n = 0; Point point = nzloc[idx]; Point line_end[2]; float a, b; int* adata = accum.ptr<int>(); int i = point.y, j = point.x, k, x0, y0, dx0, dy0, xflag; int good_line; const int shift = 16; // "remove" it by overriding it with the last element nzloc[idx] = nzloc[count-1]; // check if it has been excluded already (i.e. belongs to some other line) if( !mdata0[i*width + j] ) continue; // update accumulator, find the most probable line for( int n = 0; n < numangle; n++, adata += numrho ) { int r = cvRound( j * ttab[n*2] + i * ttab[n*2+1] ); r += (numrho - 1) / 2; int val = ++adata[r]; if( max_val < val ) { max_val = val; max_n = n; } } // if it is too "weak" candidate, continue with another point if( max_val < threshold ) continue; // from the current point walk in each direction // along the found line and extract the line segment a = -ttab[max_n*2+1]; b = ttab[max_n*2]; x0 = j; y0 = i; if( fabs(a) > fabs(b) ) { xflag = 1; dx0 = a > 0 ? 1 : -1; dy0 = cvRound( b*(1 << shift)/fabs(a) ); y0 = (y0 << shift) + (1 << (shift-1)); } else { xflag = 0; dy0 = b > 0 ? 1 : -1; dx0 = cvRound( a*(1 << shift)/fabs(b) ); x0 = (x0 << shift) + (1 << (shift-1)); } for( k = 0; k < 2; k++ ) { int gap = 0, x = x0, y = y0, dx = dx0, dy = dy0; if( k > 0 ) dx = -dx, dy = -dy; // walk along the line using fixed-point arithmetics, // stop at the image border or in case of too big gap for( ;; x += dx, y += dy ) { uchar* mdata; int i1, j1; if( xflag ) { j1 = x; i1 = y >> shift; } else { j1 = x >> shift; i1 = y; } if( j1 < 0 || j1 >= width || i1 < 0 || i1 >= height ) break; mdata = mdata0 + i1*width + j1; // for each non-zero point: // update line end, // clear the mask element // reset the gap if( *mdata ) { gap = 0; line_end[k].y = i1; line_end[k].x = j1; } else if( ++gap > lineGap ) break; } } good_line = std::abs(line_end[1].x - line_end[0].x) >= lineLength || std::abs(line_end[1].y - line_end[0].y) >= lineLength; for( k = 0; k < 2; k++ ) { int x = x0, y = y0, dx = dx0, dy = dy0; if( k > 0 ) dx = -dx, dy = -dy; // walk along the line using fixed-point arithmetics, // stop at the image border or in case of too big gap for( ;; x += dx, y += dy ) { uchar* mdata; int i1, j1; if( xflag ) { j1 = x; i1 = y >> shift; } else { j1 = x >> shift; i1 = y; } mdata = mdata0 + i1*width + j1; // for each non-zero point: // update line end, // clear the mask element // reset the gap if( *mdata ) { if( good_line ) { adata = accum.ptr<int>(); for( int n = 0; n < numangle; n++, adata += numrho ) { int r = cvRound( j1 * ttab[n*2] + i1 * ttab[n*2+1] ); r += (numrho - 1) / 2; adata[r]--; } } *mdata = 0; } if( i1 == line_end[k].y && j1 == line_end[k].x ) break; } }
static void thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const short* src = _src.ptr<short>(); short* dst = _dst.ptr<short>(); size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) CV_IPP_CHECK() { IppiSize sz = { roi.width, roi.height }; CV_SUPPRESS_DEPRECATED_START switch( type ) { case THRESH_TRUNC: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GT_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_LTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh + 1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO_INV: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; } CV_SUPPRESS_DEPRECATED_END } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_and_si128( v0, maxval8 ); v1 = _mm_and_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_andnot_si128( v0, maxval8 ); v1 = _mm_andnot_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_min_epi16( v0, thresh8 ); v1 = _mm_min_epi16( v1, thresh8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcleq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, double scale, double delta, int borderType ) { int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if (ddepth < 0) ddepth = sdepth; _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) ); #ifdef HAVE_IPP if ((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) && ((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && !ocl::useOpenCL()) { int iscale = saturate_cast<int>(scale), idelta = saturate_cast<int>(delta); bool floatScale = std::fabs(scale - iscale) > DBL_EPSILON, needScale = iscale != 1; bool floatDelta = std::fabs(delta - idelta) > DBL_EPSILON, needDelta = delta != 0; int borderTypeNI = borderType & ~BORDER_ISOLATED; Mat src = _src.getMat(), dst = _dst.getMat(); if (src.data != dst.data) { Ipp32s bufsize; IppStatus status = (IppStatus)-1; IppiSize roisize = { src.cols, src.rows }; IppiMaskSize masksize = ksize == 3 ? ippMskSize3x3 : ippMskSize5x5; IppiBorderType borderTypeIpp = ippiGetBorderType(borderTypeNI); #define IPP_FILTER_LAPLACIAN(ippsrctype, ippdsttype, ippfavor) \ do \ { \ if (borderTypeIpp >= 0 && ippiFilterLaplacianGetBufferSize_##ippfavor##_C1R(roisize, masksize, &bufsize) >= 0) \ { \ Ipp8u * buffer = ippsMalloc_8u(bufsize); \ status = ippiFilterLaplacianBorder_##ippfavor##_C1R((const ippsrctype *)src.data, (int)src.step, (ippdsttype *)dst.data, \ (int)dst.step, roisize, masksize, borderTypeIpp, 0, buffer); \ ippsFree(buffer); \ } \ } while ((void)0, 0) CV_SUPPRESS_DEPRECATED_START if (sdepth == CV_8U && ddepth == CV_16S && !floatScale && !floatDelta) { IPP_FILTER_LAPLACIAN(Ipp8u, Ipp16s, 8u16s); if (needScale && status >= 0) status = ippiMulC_16s_C1IRSfs((Ipp16s)iscale, (Ipp16s *)dst.data, (int)dst.step, roisize, 0); if (needDelta && status >= 0) status = ippiAddC_16s_C1IRSfs((Ipp16s)idelta, (Ipp16s *)dst.data, (int)dst.step, roisize, 0); } else if (sdepth == CV_32F && ddepth == CV_32F) { IPP_FILTER_LAPLACIAN(Ipp32f, Ipp32f, 32f); if (needScale && status >= 0) status = ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, roisize); if (needDelta && status >= 0) status = ippiAddC_32f_C1IR((Ipp32f)delta, (Ipp32f *)dst.data, (int)dst.step, roisize); } CV_SUPPRESS_DEPRECATED_END if (status >= 0) return; setIppErrorStatus(); }