static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); int type = _image.type(), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc, format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn)); if (k.empty()) return false; UMat image = _image.getUMat(), templ = _templ.getUMat(); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); UMat image_sums, image_sqsums; integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F); UMat templ_sqsum; if (!sumTemplate(_templ, templ_sqsum)) return false; k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum)); size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha, InputArray _mask, int op_type ) { CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE || op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED); int stype = _src.type(), cn = CV_MAT_CN(stype); int sdepth = CV_MAT_DEPTH(stype), ddepth = _dst.depth(); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, haveMask = !_mask.empty(); if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT", "ACCUMULATE_WEIGHTED" }; ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc, format("-D %s%s -D srcT=%s -D cn=%d -D dstT=%s%s", opMap[op_type], haveMask ? " -D HAVE_MASK" : "", ocl::typeToStr(sdepth), cn, ocl::typeToStr(ddepth), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat(); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), dstarg = ocl::KernelArg::ReadWrite(dst), maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); int argidx = k.set(0, srcarg); if (op_type == ACCUMULATE_PRODUCT) argidx = k.set(argidx, src2arg); argidx = k.set(argidx, dstarg); if (op_type == ACCUMULATE_WEIGHTED) { if (ddepth == CV_32F) argidx = k.set(argidx, (float)alpha); else argidx = k.set(argidx, alpha); } if (haveMask) k.set(argidx, maskarg); size_t globalsize[2] = { src.cols, src.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) { UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); int type = src1.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(src1, src2); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !doubleSupport && depth == CV_64F ) return false; int dbsize = ocl::Device::getDefault().maxComputeUnits(); size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); int ddepth = std::max(CV_32F, depth); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) wgs2_aligned <<= 1; wgs2_aligned >>= 1; char cvt[40]; ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); if (k.empty()) return false; UMat db(1, dbsize, ddepth); ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), dbarg = ocl::KernelArg::PtrWriteOnly(db); k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); size_t globalsize = dbsize * wgs; if (k.run(1, &globalsize, &wgs, false)) { res = sum(db.getMat(ACCESS_READ))[0]; return true; } return false; }
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize) { UMat src = _src.getUMat(); _dst.create(src.size(), src.type()); UMat dst = _dst.getUMat(); UMat src_lab; cvtColor(src, src_lab, COLOR_LBGR2Lab); UMat l(src.size(), CV_8U); UMat ab(src.size(), CV_8UC2); std::vector<UMat> l_ab(2), l_ab_denoised(2); l_ab[0] = l; l_ab[1] = ab; l_ab_denoised[0].create(src.size(), CV_8U); l_ab_denoised[1].create(src.size(), CV_8UC2); int from_to[] = { 0,0, 1,1, 2,2 }; mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3); fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize); fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize); UMat dst_lab(src.size(), CV_8UC3); mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3); cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels()); return true; }
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) { if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 || !(sdepth == CV_32S || sdepth == CV_32F) ) return false; ocl::Kernel k1("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, format("-D sdepth=%d", sdepth)); if (k1.empty()) return false; Size size = _src.size(), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width), ssize(size.width + 1, size.height + 1); _sum.create(ssize, sdepth); UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat(); t_sum = t_sum(Range::all(), Range(0, size.height)); int offset = (int)src.offset / vlen, pre_invalid = (int)src.offset % vlen; int vcols = (pre_invalid + src.cols + vlen - 1) / vlen; int sum_offset = (int)sum.offset / vlen; k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum), offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step); size_t gt = ((vcols + 1) / 2) * 256, lt = 256; if (!k1.run(1, >, <, false)) return false; ocl::Kernel k2("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, format("-D sdepth=%d", sdepth)); k2.args(ocl::KernelArg::PtrReadWrite(t_sum), ocl::KernelArg::PtrWriteOnly(sum), t_sum.rows, t_sum.cols, (int)t_sum.step, (int)sum.step, sum_offset); size_t gt2 = t_sum.cols * 32, lt2 = 256; return k2.run(1, >2, <2, false); }
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat image_sums, temp; integral(_image, image_sums, CV_32F); int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); UMat result = _result.getUMat(); if (cn==1) { Scalar templMean = mean(templ); float templ_sum = (float)templMean[0]; k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } else { Vec4f templ_sum = Vec4f::all(0); templ_sum = (Vec4f)mean(templ); k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) || (!doubleSupport && depth == CV_64F)) return false; const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type], ocl::typeToStr(ktype), ocl::typeToStr(depth), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); if (depth <= CV_32S) thresh = cvFloor(thresh); k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval)))); size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); }
void detect( InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask ) { CV_INSTRUMENT_REGION() std::vector<Point2f> corners; if (_image.isUMat()) { UMat ugrayImage; if( _image.type() != CV_8U ) cvtColor( _image, ugrayImage, COLOR_BGR2GRAY ); else ugrayImage = _image.getUMat(); goodFeaturesToTrack( ugrayImage, corners, nfeatures, qualityLevel, minDistance, _mask, blockSize, useHarrisDetector, k ); } else { Mat image = _image.getMat(), grayImage = image; if( image.type() != CV_8U ) cvtColor( image, grayImage, COLOR_BGR2GRAY ); goodFeaturesToTrack( grayImage, corners, nfeatures, qualityLevel, minDistance, _mask, blockSize, useHarrisDetector, k ); } keypoints.resize(corners.size()); std::vector<Point2f>::const_iterator corner_it = corners.begin(); std::vector<KeyPoint>::iterator keypoint_it = keypoints.begin(); for( ; corner_it != corners.end(); ++corner_it, ++keypoint_it ) *keypoint_it = KeyPoint( *corner_it, (float)blockSize ); }
static bool sumTemplate(InputArray _src, UMat & result) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) wgs2_aligned <<= 1; wgs2_aligned >>= 1; char cvt[40]; ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc, format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn, ocl::convertTypeStr(depth, wdepth, cn, cvt), (int)wgs, wgs2_aligned)); if (k.empty()) return false; UMat src = _src.getUMat(); result.create(1, 1, CV_32FC1); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), resarg = ocl::KernelArg::PtrWriteOnly(result); k.args(srcarg, src.cols, (int)src.total(), resarg); size_t globalsize = wgs; return k.run(1, &globalsize, &wgs, false); }
static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result) { _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F); if (_image.channels() == 1) return(convolve_dft(_image, _templ, _result)); else { UMat image = _image.getUMat(); UMat templ = _templ.getUMat(); UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F); bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_); if (ok==false) return false; UMat result = _result.getUMat(); return (extractFirstChannel_32F(result_, _result, _image.channels())); } }
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) return false; char cvt[40]; String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s", sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth), ocl::convertTypeStr(sdepth, sqdepth, 4, cvt), doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts); if (k1.empty()) return false; Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width); UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth); t_sum = t_sum(Range::all(), Range(0, size.height)); t_sqsum = t_sqsum(Range::all(), Range(0, size.height)); _sum.create(dsize, sdepth); _sqsum.create(dsize, sqdepth); UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat(); int offset = src.offset / vlen; int pre_invalid = src.offset % vlen; int vcols = (pre_invalid + src.cols + vlen - 1) / vlen; int sum_offset = sum.offset / sum.elemSize(); int sqsum_offset = sqsum.offset / sqsum.elemSize(); CV_Assert(sqsum.offset % sqsum.elemSize() == 0); k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum), ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step); size_t gt = ((vcols + 1) / 2) * 256, lt = 256; if (!k1.run(1, >, <, false)) return false; ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts); if (k2.empty()) return false; k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum), ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum), t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step, (int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset); size_t gt2 = t_sum.cols * 32, lt2 = 256; return k2.run(1, >2, <2, false); }
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernelX, InputArray _kernelY, double delta, int borderType) { const ocl::Device & dev = ocl::Device::getDefault(); int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) && (_src.offset() == 0) && (_src.step() % 4 == 0) && (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) ) return false; Mat kernelX = _kernelX.getMat().reshape(1, 1); if (kernelX.cols % 2 != 1) return false; Mat kernelY = _kernelY.getMat().reshape(1, 1); if (kernelY.cols % 2 != 1) return false; if (ddepth < 0) ddepth = sdepth; Size size = _src.size(); size_t globalsize[2] = { 0, 0 }; size_t localsize[2] = { 0, 0 }; globalsize[0] = size.width / 16; globalsize[1] = size.height / 2; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; char build_opts[1024]; sprintf(build_opts, "-D %s %s%s", borderMap[borderType], ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(), ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str()); ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts); if (kernel.empty()) return false; UMat src = _src.getUMat(); _dst.create(size, CV_MAKETYPE(ddepth, cn)); if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) return false; UMat dst = _dst.getUMat(); int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); idxArg = kernel.set(idxArg, (int)src.step); idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); idxArg = kernel.set(idxArg, (int)dst.step); idxArg = kernel.set(idxArg, (int)dst.rows); idxArg = kernel.set(idxArg, (int)dst.cols); idxArg = kernel.set(idxArg, static_cast<float>(delta)); return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); }
static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result) { if (useNaive(_templ.size())) return( matchTemplateNaive_CCORR(_image, _templ, _result)); else { if(_image.depth() == CV_8U) { UMat imagef, templf; UMat image = _image.getUMat(); UMat templ = _templ.getUMat(); image.convertTo(imagef, CV_32F); templ.convertTo(templf, CV_32F); return(convolve_32F(imagef, templf, _result)); } else { return(convolve_32F(_image, _templ, _result)); } } }
static bool ocl_blendLinear( InputArray _src1, InputArray _src2, InputArray _weights1, InputArray _weights2, OutputArray _dst ) { int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); char cvt[30]; ocl::Kernel k("blendLinear", ocl::imgproc::blend_linear_oclsrc, format("-D T=%s -D cn=%d -D convertToT=%s", ocl::typeToStr(depth), cn, ocl::convertTypeStr(CV_32F, depth, 1, cvt))); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), weights1 = _weights1.getUMat(), weights2 = _weights2.getUMat(), dst = _dst.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::ReadOnlyNoSize(weights1), ocl::KernelArg::ReadOnlyNoSize(weights2), ocl::KernelArg::WriteOnly(dst)); size_t globalsize[2] = { (size_t)dst.cols, (size_t)dst.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst) { UMat src = _src.getUMat(), dst = _dst.getUMat(); for (int y = 0; y < ny; ++y) for (int x = 0; x < nx; ++x) { Rect roi(x * src.cols, y * src.rows, src.cols, src.rows); UMat hdr(dst, roi); src.copyTo(hdr); } return true; }
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result) { int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); char cvt[40]; ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc, format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn)); if (k.empty()) return false; UMat image = _image.getUMat(), templ = _templ.getUMat(); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)); size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result) { int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); ocl::Device dev = ocl::Device::getDefault(); int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; int rated_cn = cn; int wtype1 = wtype; if (pxPerWIx!=1) { rated_cn = pxPerWIx; type = CV_MAKE_TYPE(depth, rated_cn); wtype1 = CV_MAKE_TYPE(wdepth, rated_cn); } char cvt[40]; char cvt1[40]; const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt); const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1); ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc, format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype), convertToWT, convertToWT1, cn, pxPerWIx)); if (k.empty()) return false; UMat image = _image.getUMat(), templ = _templ.getUMat(); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); UMat result = _result.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)); size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows}; return k.run(2, globalsize, NULL, false); }
static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight, InputOutputArray _dst, InputOutputArray _dst_weight) { String buildOptions = "-D DEFINE_feed"; ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src); ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight); ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst); ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight); ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions); if (k.empty()) return false; UMat src = _src.getUMat(); k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::ReadOnly(_weight.getUMat()), ocl::KernelArg::ReadWrite(_dst.getUMat()), ocl::KernelArg::ReadWrite(_dst_weight.getUMat()) ); size_t globalsize[2] = {(size_t)src.cols, (size_t)src.rows }; return k.run(2, globalsize, NULL, false); }
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat image_sums, temp; integral(_image, temp); if (temp.depth() == CV_64F) temp.convertTo(image_sums, CV_32F); else image_sums = temp; int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); Size size = _image.size(), tsize = templ.size(); _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); if (cn == 1) { float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area(); k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } else { Vec4f templ_sum = Vec4f::all(0); templ_sum = sum(templ) / tsize.area(); if (cn == 2) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1]); else if (cn==3) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2]); else k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (depth == CV_64F && !doubleSupport) return false; Size ssize = _src.size(); if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2))) return false; UMat src = _src.getUMat(); Size dsize = Size(ssize.width * 2, ssize.height * 2); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; const int local_size = 16; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), channels, local_size ); size_t globalThreads[2] = { dst.cols, dst.rows }; size_t localThreads[2] = { local_size, local_size }; ocl::Kernel k; if (ocl::Device::getDefault().isIntel() && channels == 1) { k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions); globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2; } else k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); return k.run(2, globalThreads, localThreads, false); }
static bool ocl_updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, float timestamp, float delbound ) { ocl::Kernel k("updateMotionHistory", ocl::video::updatemotionhistory_oclsrc); if (k.empty()) return false; UMat silh = _silhouette.getUMat(), mhi = _mhi.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(silh), ocl::KernelArg::ReadWrite(mhi), timestamp, delbound); size_t globalsize[2] = { silh.cols, silh.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (cn > 4 || (depth == CV_64F && !doubleSupport)) return false; Size ssize = _src.size(); Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; if (dsize.height < 2 || dsize.width < 2) return false; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); UMat src = _src.getUMat(); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; const int local_size = 256; int kercn = 1; if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel()) kercn = 4; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)), ocl::convertTypeStr(float_depth, depth, cn, cvt[0]), ocl::convertTypeStr(depth, float_depth, cn, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), cn, kercn, float_depth, borderMap[borderType], local_size ); ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); size_t localThreads[2] = { local_size/kercn, 1 }; size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, (dst.rows + 1) / 2 }; return k.run(2, globalThreads, localThreads, false); }
UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf) { switch (arr.kind()) { case _InputArray::GPU_MAT: arr.getGpuMat().download(buf); return buf; case _InputArray::OPENGL_BUFFER: arr.getOGlBuffer().copyTo(buf); return buf; default: return arr.getUMat(); } }
void UMat::copyTo(OutputArray _dst, InputArray _mask) const { if( _mask.empty() ) { copyTo(_dst); return; } #ifdef HAVE_OPENCL int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype); CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) ); if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2) { UMatData * prevu = _dst.getUMat().u; _dst.create( dims, size, type() ); UMat dst = _dst.getUMat(); bool haveDstUninit = false; if( prevu != dst.u ) // do not leave dst uninitialized haveDstUninit = true; String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s", ocl::memopTypeToStr(depth()), cn, mcn, haveDstUninit ? " -D HAVE_DST_UNINIT" : ""); ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts); if (!k.empty()) { k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), haveDstUninit ? ocl::KernelArg::WriteOnly(dst) : ocl::KernelArg::ReadWrite(dst)); size_t globalsize[2] = { cols, rows }; if (k.run(2, globalsize, NULL, false)) { CV_IMPL_ADD(CV_IMPL_OCL); return; } } } #endif Mat src = getMat(ACCESS_READ); src.copyTo(_dst, _mask); }
static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result ) { const ocl::Device & d = ocl::Device::getDefault(); #ifdef __ANDROID__ if (d.isNVidia()) return false; #endif const int cn = _src.channels(); if (cn > 4) return false; int type = _src.type(), depth = CV_MAT_DEPTH(type); bool doubleSupport = d.doubleFPConfig() > 0, haveMask = _mask.kind() != _InputArray::NONE; if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) || (!doubleSupport && depth == CV_64F)) return false; UMat src = _src.getUMat(); if (normType == NORM_INF) { if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask, std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U)) return false; } else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) { Scalar sc; bool unstype = depth == CV_8U || depth == CV_16U; if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ? OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) ) return false; double s = 0.0; for (int i = 0; i < (haveMask ? cn : 1); ++i) s += sc[i]; result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s); } return true; }
static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst) { ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); _dst.create(src1.size(), src1.type()); UMat dst = _dst.getUMat(); int cn = src1.channels(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::WriteOnly(dst, cn)); size_t globalsize[2] = { (size_t)src1.cols * cn, (size_t)src1.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) || (!doubleSupport && depth == CV_64F)) return false; const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Device dev = ocl::Device::getDefault(); int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type], ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); if (depth <= CV_32S) thresh = cvFloor(thresh); const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 }; double min_val = min_vals[depth]; k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val)))); size_t globalsize[2] = { static_cast<size_t>(dst.cols * cn / kercn), static_cast<size_t>(dst.rows) }; globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size; return k.run(2, globalsize, NULL, false); }
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) return false; static const int tileSize = 16; String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), tileSize, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); if (kcols.empty()) return false; UMat src = _src.getUMat(); Size src_size = src.size(); Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); UMat buf(bufsize, sdepth); UMat buf_sq(bufsize, sqdepth); kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); size_t gt = src.cols, lt = tileSize; if (!kcols.run(1, >, <, false)) return false; ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); if (krows.empty()) return false; Size sumsize(src_size.width + 1, src_size.height + 1); _sum.create(sumsize, sdepth); UMat sum = _sum.getUMat(); _sqsum.create(sumsize, sqdepth); UMat sum_sq = _sqsum.getUMat(); krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); gt = src.rows; return krows.run(1, >, <, false); }
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ((depth == CV_64F) && !(doubleSupport)) return false; Size ssize = _src.size(); Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); UMat src = _src.getUMat(); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), channels ); ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); size_t localThreads[2] = { 256, 1 }; size_t globalThreads[2] = { src.cols, dst.rows }; return k.run(2, globalThreads, localThreads, false); }
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) { int type = _src.type(), cn = CV_MAT_CN(type); if (cn > 4 || cn == 3) return false; const char * kernelName; int flipType; if (flipCode == 0) kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; else if (flipCode > 0) kernelName = "arithm_flip_cols", flipType = FLIP_COLS; else kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; Size size = _src.size(); int cols = size.width, rows = size.height; if ((cols == 1 && flipType == FLIP_COLS) || (rows == 1 && flipType == FLIP_ROWS) || (rows == 1 && cols == 1 && flipType == FLIP_BOTH)) { _src.copyTo(_dst); return true; } ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, format( "-D type=%s", ocl::memopTypeToStr(type))); if (k.empty()) return false; _dst.create(size, type); UMat src = _src.getUMat(), dst = _dst.getUMat(); cols = flipType == FLIP_COLS ? ((cols+1)/2) : cols; rows = flipType & FLIP_ROWS ? ((rows+1)/2) : rows; size_t globalsize[2] = { cols, rows }; return k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), rows, cols).run(2, globalsize, NULL, false); }