void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType ) { int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if (ddepth < 0) ddepth = sdepth; int dtype = CV_MAKE_TYPE(ddepth, cn); _dst.create( _src.size(), dtype ); #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::useTegra() && scale == 1.0 && delta == 0) { Mat src = _src.getMat(), dst = _dst.getMat(); if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType)) return; if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType)) return; } #endif #ifdef HAVE_IPP CV_IPP_CHECK() { if (ksize < 0) { if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } } else if (0 < ksize) { if (IPPDerivSobel(_src, _dst, ddepth, dx, dy, ksize, scale, delta, borderType)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } } } #endif int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); Mat kx, ky; getDerivKernels( kx, ky, dx, dy, ksize, false, ktype ); if( scale != 1 ) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if( dx == 0 ) kx *= scale; else ky *= scale; } sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); }
Rect CylindricalWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) { if (ocl::useOpenCL()) { ocl::Kernel k("buildWarpCylindricalMaps", ocl::stitching::warpers_oclsrc); if (!k.empty()) { int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; projector_.setCameraParams(K, R); Point dst_tl, dst_br; detectResultRoi(src_size, dst_tl, dst_br); Size dsize(dst_br.x - dst_tl.x + 1, dst_br.y - dst_tl.y + 1); xmap.create(dsize, CV_32FC1); ymap.create(dsize, CV_32FC1); Mat k_rinv(1, 9, CV_32FC1, projector_.k_rinv); UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ); k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap), ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI); size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI }; if (k.run(2, globalsize, NULL, true)) { CV_IMPL_ADD(CV_IMPL_OCL); return Rect(dst_tl, dst_br); } } } return RotationWarperBase<CylindricalProjector>::buildMaps(src_size, K, R, xmap, ymap); }
void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const { bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON; int stype = type(), cn = CV_MAT_CN(stype); if( _type < 0 ) _type = _dst.fixedType() ? _dst.type() : stype; else _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn); int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type); if( sdepth == ddepth && noScale ) { copyTo(_dst); return; } #ifdef HAVE_OPENCL bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool needDouble = sdepth == CV_64F || ddepth == CV_64F; if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() && ((needDouble && doubleSupport) || !needDouble) ) { int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4; char cvt[2][40]; ocl::Kernel k("convertTo", ocl::core::convert_oclsrc, format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s%s", ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth), ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", noScale ? " -D NO_SCALE" : "")); if (!k.empty()) { UMat src = *this; _dst.create( size(), _type ); UMat dst = _dst.getUMat(); float alphaf = (float)alpha, betaf = (float)beta; ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), dstarg = ocl::KernelArg::WriteOnly(dst, cn); if (noScale) k.args(srcarg, dstarg, rowsPerWI); else if (wdepth == CV_32F) k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI); else k.args(srcarg, dstarg, alpha, beta, rowsPerWI); size_t globalsize[2] = { (size_t)dst.cols * cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI }; if (k.run(2, globalsize, NULL, false)) { CV_IMPL_ADD(CV_IMPL_OCL); return; } } } #endif Mat m = getMat(ACCESS_READ); m.convertTo(_dst, _type, alpha, beta); }
CV_IPP_CHECK() { if (ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); }
UMat& UMat::setTo(InputArray _value, InputArray _mask) { bool haveMask = !_mask.empty(); #ifdef HAVE_OPENCL int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp); if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() ) { Mat value = _value.getMat(); CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) ); int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)), kertp = CV_MAKE_TYPE(d, kercn); double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn); int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d", ocl::memopTypeToStr(kertp), rowsPerWI, ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)), ocl::memopTypeToStr(d), kercn); ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts); if( !setK.empty() ) { ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn); UMat mask; if( haveMask ) { mask = _mask.getUMat(); CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 ); ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), dstarg = ocl::KernelArg::ReadWrite(*this); setK.args(maskarg, dstarg, scalararg); } else { ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn); setK.args(dstarg, scalararg); } size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI }; if( setK.run(2, globalsize, NULL, false) ) { CV_IMPL_ADD(CV_IMPL_OCL); return *this; } } } #endif Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE); m.setTo(_value, _mask); return *this; }
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height) { if (!CV_IPP_CHECK_COND) return 0; if (height == 1) step1 = step = width * sizeof(dst[0]); if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height))) { CV_IMPL_ADD(CV_IMPL_IPP); return 1; } setIppErrorStatus(); return 0; }
Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray _xmap, OutputArray _ymap) { projector_.setCameraParams(K, R, T); Point dst_tl, dst_br; detectResultRoi(src_size, dst_tl, dst_br); Size dsize(dst_br.x - dst_tl.x + 1, dst_br.y - dst_tl.y + 1); _xmap.create(dsize, CV_32FC1); _ymap.create(dsize, CV_32FC1); if (ocl::useOpenCL()) { ocl::Kernel k("buildWarpPlaneMaps", ocl::stitching::warpers_oclsrc); if (!k.empty()) { int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; Mat k_rinv(1, 9, CV_32FC1, projector_.k_rinv), t(1, 3, CV_32FC1, projector_.t); UMat uxmap = _xmap.getUMat(), uymap = _ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ), ut = t.getUMat(ACCESS_READ); k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap), ocl::KernelArg::PtrReadOnly(uk_rinv), ocl::KernelArg::PtrReadOnly(ut), dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI); size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI }; if (k.run(2, globalsize, NULL, true)) { CV_IMPL_ADD(CV_IMPL_OCL); return Rect(dst_tl, dst_br); } } } Mat xmap = _xmap.getMat(), ymap = _ymap.getMat(); float x, y; for (int v = dst_tl.y; v <= dst_br.y; ++v) { for (int u = dst_tl.x; u <= dst_br.x; ++u) { projector_.mapBackward(static_cast<float>(u), static_cast<float>(v), x, y); xmap.at<float>(v - dst_tl.y, u - dst_tl.x) = x; ymap.at<float>(v - dst_tl.y, u - dst_tl.x) = y; } } return Rect(dst_tl, dst_br); }
void UMat::copyTo(OutputArray _dst, InputArray _mask) const { if( _mask.empty() ) { copyTo(_dst); return; } #ifdef HAVE_OPENCL int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype); CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) ); if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2) { UMatData * prevu = _dst.getUMat().u; _dst.create( dims, size, type() ); UMat dst = _dst.getUMat(); bool haveDstUninit = false; if( prevu != dst.u ) // do not leave dst uninitialized haveDstUninit = true; String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s", ocl::memopTypeToStr(depth()), cn, mcn, haveDstUninit ? " -D HAVE_DST_UNINIT" : ""); ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts); if (!k.empty()) { k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), haveDstUninit ? ocl::KernelArg::WriteOnly(dst) : ocl::KernelArg::ReadWrite(dst)); size_t globalsize[2] = { cols, rows }; if (k.run(2, globalsize, NULL, false)) { CV_IMPL_ADD(CV_IMPL_OCL); return; } } } #endif Mat src = getMat(ACCESS_READ); src.copyTo(_dst, _mask); }
static void HoughLinesProbabilistic( Mat& image, float rho, float theta, int threshold, int lineLength, int lineGap, std::vector<Vec4i>& lines, int linesMax ) { Point pt; float irho = 1 / rho; RNG rng((uint64)-1); CV_Assert( image.type() == CV_8UC1 ); int width = image.cols; int height = image.rows; int numangle = cvRound(CV_PI / theta); int numrho = cvRound(((width + height) * 2 + 1) / rho); #if defined HAVE_IPP && !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK CV_IPP_CHECK() { IppiSize srcSize = { width, height }; IppPointPolar delta = { rho, theta }; IppiHoughProbSpec* pSpec; int bufferSize, specSize; int ipp_linesMax = std::min(linesMax, numangle*numrho); int linesCount = 0; lines.resize(ipp_linesMax); IppStatus ok = ippiHoughProbLineGetSize_8u_C1R(srcSize, delta, &specSize, &bufferSize); Ipp8u* buffer = ippsMalloc_8u(bufferSize); pSpec = (IppiHoughProbSpec*) malloc(specSize); if (ok >= 0) ok = ippiHoughProbLineInit_8u32f_C1R(srcSize, delta, ippAlgHintNone, pSpec); if (ok >= 0) ok = ippiHoughProbLine_8u32f_C1R(image.data, image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec); free(pSpec); ippsFree(buffer); if (ok >= 0) { lines.resize(linesCount); CV_IMPL_ADD(CV_IMPL_IPP); return; } lines.clear(); setIppErrorStatus(); } #endif Mat accum = Mat::zeros( numangle, numrho, CV_32SC1 ); Mat mask( height, width, CV_8UC1 ); std::vector<float> trigtab(numangle*2); for( int n = 0; n < numangle; n++ ) { trigtab[n*2] = (float)(cos((double)n*theta) * irho); trigtab[n*2+1] = (float)(sin((double)n*theta) * irho); } const float* ttab = &trigtab[0]; uchar* mdata0 = mask.ptr(); std::vector<Point> nzloc; // stage 1. collect non-zero image points for( pt.y = 0; pt.y < height; pt.y++ ) { const uchar* data = image.ptr(pt.y); uchar* mdata = mask.ptr(pt.y); for( pt.x = 0; pt.x < width; pt.x++ ) { if( data[pt.x] ) { mdata[pt.x] = (uchar)1; nzloc.push_back(pt); } else mdata[pt.x] = 0; } } int count = (int)nzloc.size(); // stage 2. process all the points in random order for( ; count > 0; count-- ) { // choose random point out of the remaining ones int idx = rng.uniform(0, count); int max_val = threshold-1, max_n = 0; Point point = nzloc[idx]; Point line_end[2]; float a, b; int* adata = accum.ptr<int>(); int i = point.y, j = point.x, k, x0, y0, dx0, dy0, xflag; int good_line; const int shift = 16; // "remove" it by overriding it with the last element nzloc[idx] = nzloc[count-1]; // check if it has been excluded already (i.e. belongs to some other line) if( !mdata0[i*width + j] ) continue; // update accumulator, find the most probable line for( int n = 0; n < numangle; n++, adata += numrho ) { int r = cvRound( j * ttab[n*2] + i * ttab[n*2+1] ); r += (numrho - 1) / 2; int val = ++adata[r]; if( max_val < val ) { max_val = val; max_n = n; } } // if it is too "weak" candidate, continue with another point if( max_val < threshold ) continue; // from the current point walk in each direction // along the found line and extract the line segment a = -ttab[max_n*2+1]; b = ttab[max_n*2]; x0 = j; y0 = i; if( fabs(a) > fabs(b) ) { xflag = 1; dx0 = a > 0 ? 1 : -1; dy0 = cvRound( b*(1 << shift)/fabs(a) ); y0 = (y0 << shift) + (1 << (shift-1)); } else { xflag = 0; dy0 = b > 0 ? 1 : -1; dx0 = cvRound( a*(1 << shift)/fabs(b) ); x0 = (x0 << shift) + (1 << (shift-1)); } for( k = 0; k < 2; k++ ) { int gap = 0, x = x0, y = y0, dx = dx0, dy = dy0; if( k > 0 ) dx = -dx, dy = -dy; // walk along the line using fixed-point arithmetics, // stop at the image border or in case of too big gap for( ;; x += dx, y += dy ) { uchar* mdata; int i1, j1; if( xflag ) { j1 = x; i1 = y >> shift; } else { j1 = x >> shift; i1 = y; } if( j1 < 0 || j1 >= width || i1 < 0 || i1 >= height ) break; mdata = mdata0 + i1*width + j1; // for each non-zero point: // update line end, // clear the mask element // reset the gap if( *mdata ) { gap = 0; line_end[k].y = i1; line_end[k].x = j1; } else if( ++gap > lineGap ) break; } } good_line = std::abs(line_end[1].x - line_end[0].x) >= lineLength || std::abs(line_end[1].y - line_end[0].y) >= lineLength; for( k = 0; k < 2; k++ ) { int x = x0, y = y0, dx = dx0, dy = dy0; if( k > 0 ) dx = -dx, dy = -dy; // walk along the line using fixed-point arithmetics, // stop at the image border or in case of too big gap for( ;; x += dx, y += dy ) { uchar* mdata; int i1, j1; if( xflag ) { j1 = x; i1 = y >> shift; } else { j1 = x >> shift; i1 = y; } mdata = mdata0 + i1*width + j1; // for each non-zero point: // update line end, // clear the mask element // reset the gap if( *mdata ) { if( good_line ) { adata = accum.ptr<int>(); for( int n = 0; n < numangle; n++, adata += numrho ) { int r = cvRound( j1 * ttab[n*2] + i1 * ttab[n*2+1] ); r += (numrho - 1) / 2; adata[r]--; } } *mdata = 0; } if( i1 == line_end[k].y && j1 == line_end[k].x ) break; } }
static double getThreshVal_Otsu_8u( const Mat& _src ) { Size size = _src.size(); int step = (int) _src.step; if( _src.isContinuous() ) { size.width *= size.height; size.height = 1; step = size.width; } #if IPP_VERSION_X100 >= 801 && !defined(HAVE_IPP_ICV_ONLY) CV_IPP_CHECK() { IppiSize srcSize = { size.width, size.height }; Ipp8u thresh; CV_SUPPRESS_DEPRECATED_START IppStatus ok = ippiComputeThreshold_Otsu_8u_C1R(_src.ptr(), step, srcSize, &thresh); CV_SUPPRESS_DEPRECATED_END if (ok >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return thresh; } setIppErrorStatus(); } #endif const int N = 256; int i, j, h[N] = {0}; for( i = 0; i < size.height; i++ ) { const uchar* src = _src.ptr() + step*i; j = 0; #if CV_ENABLE_UNROLLED for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; h[v0]++; h[v1]++; v0 = src[j+2]; v1 = src[j+3]; h[v0]++; h[v1]++; } #endif for( ; j < size.width; j++ ) h[src[j]]++; } double mu = 0, scale = 1./(size.width*size.height); for( i = 0; i < N; i++ ) mu += i*(double)h[i]; mu *= scale; double mu1 = 0, q1 = 0; double max_sigma = 0, max_val = 0; for( i = 0; i < N; i++ ) { double p_i, q2, mu2, sigma; p_i = h[i]*scale; mu1 *= q1; q1 += p_i; q2 = 1. - q1; if( std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON ) continue; mu1 = (mu1 + i*p_i)/q1; mu2 = (mu - q1*mu1)/q2; sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2); if( sigma > max_sigma ) { max_sigma = sigma; max_val = i; } } return max_val; }
static void thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const float* src = _src.ptr<float>(); float* dst = _dst.ptr<float>(); size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_32f(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) CV_IPP_CHECK() { IppiSize sz = { roi.width, roi.height }; switch( type ) { case THRESH_TRUNC: if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO: if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO_INV: if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; } } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmpgt_ps( v0, thresh4 ); v1 = _mm_cmpgt_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #elif CV_NEON float32x4_t v_thresh = vdupq_n_f32(thresh); uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval)); for( ; j <= roi.width - 4; j += 4 ) { float32x4_t v_src = vld1q_f32(src + j); uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval); vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmple_ps( v0, thresh4 ); v1 = _mm_cmple_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #elif CV_NEON float32x4_t v_thresh = vdupq_n_f32(thresh); uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval)); for( ; j <= roi.width - 4; j += 4 ) { float32x4_t v_src = vld1q_f32(src + j); uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval); vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_min_ps( v0, thresh4 ); v1 = _mm_min_ps( v1, thresh4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #elif CV_NEON float32x4_t v_thresh = vdupq_n_f32(thresh); for( ; j <= roi.width - 4; j += 4 ) vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #elif CV_NEON float32x4_t v_thresh = vdupq_n_f32(thresh); for( ; j <= roi.width - 4; j += 4 ) { float32x4_t v_src = vld1q_f32(src + j); uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), vreinterpretq_u32_f32(v_src)); vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #elif CV_NEON float32x4_t v_thresh = vdupq_n_f32(thresh); for( ; j <= roi.width - 4; j += 4 ) { float32x4_t v_src = vld1q_f32(src + j); uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), vreinterpretq_u32_f32(v_src)); vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
static void thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const short* src = _src.ptr<short>(); short* dst = _dst.ptr<short>(); size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) CV_IPP_CHECK() { IppiSize sz = { roi.width, roi.height }; CV_SUPPRESS_DEPRECATED_START switch( type ) { case THRESH_TRUNC: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GT_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_LTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh + 1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO_INV: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; } CV_SUPPRESS_DEPRECATED_END } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_and_si128( v0, maxval8 ); v1 = _mm_and_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_andnot_si128( v0, maxval8 ); v1 = _mm_andnot_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_min_epi16( v0, thresh8 ); v1 = _mm_min_epi16( v1, thresh8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcleq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, double scale, double delta, int borderType ) { int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if (ddepth < 0) ddepth = sdepth; _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) ); #ifdef HAVE_IPP CV_IPP_CHECK() { if ((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) && ((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && !ocl::useOpenCL()) { int iscale = saturate_cast<int>(scale), idelta = saturate_cast<int>(delta); bool floatScale = std::fabs(scale - iscale) > DBL_EPSILON, needScale = iscale != 1; bool floatDelta = std::fabs(delta - idelta) > DBL_EPSILON, needDelta = delta != 0; int borderTypeNI = borderType & ~BORDER_ISOLATED; Mat src = _src.getMat(), dst = _dst.getMat(); if (src.data != dst.data) { Ipp32s bufsize; IppStatus status = (IppStatus)-1; IppiSize roisize = { src.cols, src.rows }; IppiMaskSize masksize = ksize == 3 ? ippMskSize3x3 : ippMskSize5x5; IppiBorderType borderTypeIpp = ippiGetBorderType(borderTypeNI); #define IPP_FILTER_LAPLACIAN(ippsrctype, ippdsttype, ippfavor) \ do \ { \ if (borderTypeIpp >= 0 && ippiFilterLaplacianGetBufferSize_##ippfavor##_C1R(roisize, masksize, &bufsize) >= 0) \ { \ Ipp8u * buffer = ippsMalloc_8u(bufsize); \ status = ippiFilterLaplacianBorder_##ippfavor##_C1R(src.ptr<ippsrctype>(), (int)src.step, dst.ptr<ippdsttype>(), \ (int)dst.step, roisize, masksize, borderTypeIpp, 0, buffer); \ ippsFree(buffer); \ } \ } while ((void)0, 0) CV_SUPPRESS_DEPRECATED_START if (sdepth == CV_8U && ddepth == CV_16S && !floatScale && !floatDelta) { IPP_FILTER_LAPLACIAN(Ipp8u, Ipp16s, 8u16s); if (needScale && status >= 0) status = ippiMulC_16s_C1IRSfs((Ipp16s)iscale, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0); if (needDelta && status >= 0) status = ippiAddC_16s_C1IRSfs((Ipp16s)idelta, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0); } else if (sdepth == CV_32F && ddepth == CV_32F) { IPP_FILTER_LAPLACIAN(Ipp32f, Ipp32f, 32f); if (needScale && status >= 0) status = ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roisize); if (needDelta && status >= 0) status = ippiAddC_32f_C1IR((Ipp32f)delta, dst.ptr<Ipp32f>(), (int)dst.step, roisize); } CV_SUPPRESS_DEPRECATED_END if (status >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } }