void cv::blendLinear( InputArray _src1, InputArray _src2, InputArray _weights1, InputArray _weights2, OutputArray _dst ) { int type = _src1.type(), depth = CV_MAT_DEPTH(type); Size size = _src1.size(); CV_Assert(depth == CV_8U || depth == CV_32F); CV_Assert(size == _src2.size() && size == _weights1.size() && size == _weights2.size()); CV_Assert(type == _src2.type() && _weights1.type() == CV_32FC1 && _weights2.type() == CV_32FC1); _dst.create(size, type); CV_OCL_RUN(_dst.isUMat(), ocl_blendLinear(_src1, _src2, _weights1, _weights2, _dst)) Mat src1 = _src1.getMat(), src2 = _src2.getMat(), weights1 = _weights1.getMat(), weights2 = _weights2.getMat(), dst = _dst.getMat(); if (depth == CV_8U) { BlendLinearInvoker<uchar> invoker(src1, src2, weights1, weights2, dst); parallel_for_(Range(0, src1.rows), invoker, dst.total()/(double)(1<<16)); } else if (depth == CV_32F) { BlendLinearInvoker<float> invoker(src1, src2, weights1, weights2, dst); parallel_for_(Range(0, src1.rows), invoker, dst.total()/(double)(1<<16)); } }
void repeat(InputArray _src, int ny, int nx, OutputArray _dst) { CV_Assert( _src.dims() <= 2 ); CV_Assert( ny > 0 && nx > 0 ); Size ssize = _src.size(); _dst.create(ssize.height*ny, ssize.width*nx, _src.type()); CV_OCL_RUN(_dst.isUMat(), ocl_repeat(_src, ny, nx, _dst)) Mat src = _src.getMat(), dst = _dst.getMat(); Size dsize = dst.size(); int esz = (int)src.elemSize(); int x, y; ssize.width *= esz; dsize.width *= esz; for( y = 0; y < ssize.height; y++ ) { for( x = 0; x < dsize.width; x += ssize.width ) memcpy( dst.data + y*dst.step + x, src.data + y*src.step, ssize.width ); } for( ; y < dsize.height; y++ ) memcpy( dst.data + y*dst.step, dst.data + (y - ssize.height)*dst.step, dsize.width ); }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), forward_ocl(inputs_arr, outputs_arr, internals_arr)) Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); }
void flip( InputArray _src, OutputArray _dst, int flip_mode ) { CV_Assert( _src.dims() <= 2 ); CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src,_dst, flip_mode)) Mat src = _src.getMat(); _dst.create( src.size(), src.type() ); Mat dst = _dst.getMat(); size_t esz = src.elemSize(); if( flip_mode <= 0 ) flipVert( src.data, src.step, dst.data, dst.step, src.size(), esz ); else flipHoriz( src.data, src.step, dst.data, dst.step, src.size(), esz ); if( flip_mode < 0 ) flipHoriz( dst.data, dst.step, dst.data, dst.step, dst.size(), esz ); }
void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if (type != CV_8UC3 && type != CV_8UC4) { CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!"); return; } CV_OCL_RUN(_src.dims() <= 2 && (_dst.isUMat() || _src.isUMat()), ocl_fastNlMeansDenoisingColored(_src, _dst, h, hForColorComponents, templateWindowSize, searchWindowSize)) Mat src = _src.getMat(); _dst.create(src.size(), type); Mat dst = _dst.getMat(); Mat src_lab; cvtColor(src, src_lab, COLOR_LBGR2Lab); Mat l(src.size(), CV_8U); Mat ab(src.size(), CV_8UC2); Mat l_ab[] = { l, ab }; int from_to[] = { 0,0, 1,1, 2,2 }; mixChannels(&src_lab, 1, l_ab, 2, from_to, 3); fastNlMeansDenoising(l, l, h, templateWindowSize, searchWindowSize); fastNlMeansDenoising(ab, ab, hForColorComponents, templateWindowSize, searchWindowSize); Mat l_ab_denoised[] = { l, ab }; Mat dst_lab(src.size(), CV_MAKE_TYPE(depth, 3)); mixChannels(l_ab_denoised, 2, &dst_lab, 1, from_to, 3); cvtColor(dst_lab, dst, COLOR_Lab2LBGR, cn); }
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()), ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize)) Mat src = _src.getMat(); _dst.create(src.size(), src.type()); Mat dst = _dst.getMat(); #ifdef HAVE_TEGRA_OPTIMIZATION if(tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize)) return; #endif switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker<uchar>( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker<cv::Vec2b>( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker<cv::Vec3b>( src, dst, templateWindowSize, searchWindowSize, h)); break; default: CV_Error(Error::StsBadArg, "Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported"); } }
void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, double timestamp, double duration ) { CV_Assert( _silhouette.type() == CV_8UC1 && _mhi.type() == CV_32FC1 ); CV_Assert( _silhouette.sameSize(_mhi) ); float ts = (float)timestamp; float delbound = (float)(timestamp - duration); CV_OCL_RUN(_mhi.isUMat() && _mhi.dims() <= 2, ocl_updateMotionHistory(_silhouette, _mhi, ts, delbound)) Mat silh = _silhouette.getMat(), mhi = _mhi.getMat(); Size size = silh.size(); if( silh.isContinuous() && mhi.isContinuous() ) { size.width *= size.height; size.height = 1; } #if CV_SSE2 volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2); #endif for(int y = 0; y < size.height; y++ ) { const uchar* silhData = silh.ptr<uchar>(y); float* mhiData = mhi.ptr<float>(y); int x = 0; #if CV_SSE2 if( useSIMD ) { __m128 ts4 = _mm_set1_ps(ts), db4 = _mm_set1_ps(delbound); for( ; x <= size.width - 8; x += 8 ) { __m128i z = _mm_setzero_si128(); __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(silhData + x)), z); __m128 s0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, z)), s1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, z)); __m128 v0 = _mm_loadu_ps(mhiData + x), v1 = _mm_loadu_ps(mhiData + x + 4); __m128 fz = _mm_setzero_ps(); v0 = _mm_and_ps(v0, _mm_cmpge_ps(v0, db4)); v1 = _mm_and_ps(v1, _mm_cmpge_ps(v1, db4)); __m128 m0 = _mm_and_ps(_mm_xor_ps(v0, ts4), _mm_cmpneq_ps(s0, fz)); __m128 m1 = _mm_and_ps(_mm_xor_ps(v1, ts4), _mm_cmpneq_ps(s1, fz)); v0 = _mm_xor_ps(v0, m0); v1 = _mm_xor_ps(v1, m1); _mm_storeu_ps(mhiData + x, v0); _mm_storeu_ps(mhiData + x + 4, v1); } } #endif for( ; x < size.width; x++ ) { float val = mhiData[x]; val = silhData[x] ? ts : val < delbound ? 0 : val; mhiData[x] = val; } } }