void UMat::copyTo(OutputArray _dst, InputArray _mask) const { if( _mask.empty() ) { copyTo(_dst); return; } #ifdef HAVE_OPENCL int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype); CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) ); if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2) { UMatData * prevu = _dst.getUMat().u; _dst.create( dims, size, type() ); UMat dst = _dst.getUMat(); bool haveDstUninit = false; if( prevu != dst.u ) // do not leave dst uninitialized haveDstUninit = true; String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s", ocl::memopTypeToStr(depth()), cn, mcn, haveDstUninit ? " -D HAVE_DST_UNINIT" : ""); ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts); if (!k.empty()) { k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), haveDstUninit ? ocl::KernelArg::WriteOnly(dst) : ocl::KernelArg::ReadWrite(dst)); size_t globalsize[2] = { (size_t)cols, (size_t)rows }; if (k.run(2, globalsize, NULL, false)) { CV_IMPL_ADD(CV_IMPL_OCL); return; } } } #endif Mat src = getMat(ACCESS_READ); src.copyTo(_dst, _mask); }
void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const { bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON; int stype = type(), cn = CV_MAT_CN(stype); if( _type < 0 ) _type = _dst.fixedType() ? _dst.type() : stype; else _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn); int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type); if( sdepth == ddepth && noScale ) { copyTo(_dst); return; } #ifdef HAVE_OPENCL bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool needDouble = sdepth == CV_64F || ddepth == CV_64F; if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() && ((needDouble && doubleSupport) || !needDouble) ) { int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4; char cvt[2][40]; ocl::Kernel k("convertTo", ocl::core::convert_oclsrc, format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s", ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth), ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (!k.empty()) { UMat src = *this; _dst.create( size(), _type ); UMat dst = _dst.getUMat(); float alphaf = (float)alpha, betaf = (float)beta; ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), dstarg = ocl::KernelArg::WriteOnly(dst, cn); if (wdepth == CV_32F) k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI); else k.args(srcarg, dstarg, alpha, beta, rowsPerWI); size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI }; if (k.run(2, globalsize, NULL, false)) { CV_IMPL_ADD(CV_IMPL_OCL); return; } } } #endif Mat m = getMat(ACCESS_READ); m.convertTo(_dst, _type, alpha, beta); }
static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst) { ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); _dst.create(src1.size(), src1.type()); UMat dst = _dst.getUMat(); int cn = src1.channels(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::WriteOnly(dst, cn)); size_t globalsize[2] = { (size_t)src1.cols * cn, (size_t)src1.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { int type = _src.type(), cn = CV_MAT_CN(type); Size size = _src.size(); if ( type != CV_8UC1 || type != CV_8UC2 || type != CV_8UC4 ) return false; int templateWindowHalfWize = templateWindowSize / 2; int searchWindowHalfSize = searchWindowSize / 2; templateWindowSize = templateWindowHalfWize * 2 + 1; searchWindowSize = searchWindowHalfSize * 2 + 1; int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS); int almostTemplateWindowSizeSqBinShift = -1; char cvt[2][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" " -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s", templateWindowSize, searchWindowSize, ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, CTA_SIZE, templateWindowHalfWize, searchWindowHalfSize, ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn, CTA_SIZE >> 1, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1])); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); if (k.empty()) return false; UMat almostDist2Weight; if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, almostTemplateWindowSizeSqBinShift)) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); UMat srcex; int borderSize = searchWindowHalfSize + templateWindowHalfWize; copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); _dst.create(size, type); UMat dst = _dst.getUMat(); int searchWindowSizeSq = searchWindowSize * searchWindowSize; Size upColSumSize(size.width, searchWindowSizeSq * nblocksy); Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy); UMat buffer(upColSumSize + colSumSize, CV_32SC(cn)); srcex = srcex(Rect(Point(borderSize, borderSize), size)); k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(almostDist2Weight), ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift); size_t globalsize[2] = { nblocksx * CTA_SIZE, nblocksy }, localsize[2] = { CTA_SIZE, 1 }; return k.run(2, globalsize, localsize, false); }
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) return false; static const int tileSize = 16; String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), tileSize, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); if (kcols.empty()) return false; UMat src = _src.getUMat(); Size src_size = src.size(); Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); UMat buf(bufsize, sdepth); UMat buf_sq(bufsize, sqdepth); kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); size_t gt = src.cols, lt = tileSize; if (!kcols.run(1, >, <, false)) return false; ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); if (krows.empty()) return false; Size sumsize(src_size.width + 1, src_size.height + 1); _sum.create(sumsize, sdepth); UMat sum = _sum.getUMat(); _sqsum.create(sumsize, sqdepth); UMat sum_sq = _sqsum.getUMat(); krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); gt = src.rows; return krows.run(1, >, <, false); }
Point PlaneWarperOcl::warp(InputArray src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode, OutputArray dst) { UMat uxmap, uymap; Rect dst_roi = buildMaps(src.size(), K, R, T, uxmap, uymap); dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type()); UMat udst = dst.getUMat(); remap(src, udst, uxmap, uymap, interp_mode, border_mode); return dst_roi.tl(); }
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernelX, InputArray _kernelY, double delta, int borderType) { const ocl::Device & dev = ocl::Device::getDefault(); int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) && (_src.offset() == 0) && (_src.step() % 4 == 0) && (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) ) return false; Mat kernelX = _kernelX.getMat().reshape(1, 1); if (kernelX.cols % 2 != 1) return false; Mat kernelY = _kernelY.getMat().reshape(1, 1); if (kernelY.cols % 2 != 1) return false; if (ddepth < 0) ddepth = sdepth; Size size = _src.size(); size_t globalsize[2] = { 0, 0 }; size_t localsize[2] = { 0, 0 }; globalsize[0] = size.width / 16; globalsize[1] = size.height / 2; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" }; char build_opts[1024]; sprintf(build_opts, "-D %s %s%s", borderMap[borderType], ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(), ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str()); ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts); if (kernel.empty()) return false; UMat src = _src.getUMat(); _dst.create(size, CV_MAKETYPE(ddepth, cn)); if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) return false; UMat dst = _dst.getUMat(); int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src)); idxArg = kernel.set(idxArg, (int)src.step); idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); idxArg = kernel.set(idxArg, (int)dst.step); idxArg = kernel.set(idxArg, (int)dst.rows); idxArg = kernel.set(idxArg, (int)dst.cols); idxArg = kernel.set(idxArg, static_cast<float>(delta)); return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); }
static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst) { UMat src = _src.getUMat(), dst = _dst.getUMat(); for (int y = 0; y < ny; ++y) for (int x = 0; x < nx; ++x) { Rect roi(x * src.cols, y * src.rows, src.cols, src.rows); UMat hdr(dst, roi); src.copyTo(hdr); } return true; }
void UMat::copyTo(OutputArray _dst, InputArray _mask) const { if( _mask.empty() ) { copyTo(_dst); return; } int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype); CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) ); if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2) { UMatData * prevu = _dst.getUMat().u; _dst.create( dims, size, type() ); UMat dst = _dst.getUMat(); if( prevu != dst.u ) // do not leave dst uninitialized dst = Scalar(0); ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, format("-D COPY_TO_MASK -D T=%s -D scn=%d -D mcn=%d", ocl::memopTypeToStr(depth()), cn, mcn)); if (!k.empty()) { k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), ocl::KernelArg::WriteOnly(dst)); size_t globalsize[2] = { cols, rows }; if (k.run(2, globalsize, NULL, false)) return; } } Mat src = getMat(ACCESS_READ); src.copyTo(_dst, _mask); }
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat image_sums, temp; integral(_image, temp); if (temp.depth() == CV_64F) temp.convertTo(image_sums, CV_32F); else image_sums = temp; int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); Size size = _image.size(), tsize = templ.size(); _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); if (cn == 1) { float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area(); k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } else { Vec4f templ_sum = Vec4f::all(0); templ_sum = sum(templ) / tsize.area(); if (cn == 2) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1]); else if (cn==3) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2]); else k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (depth == CV_64F && !doubleSupport) return false; Size ssize = _src.size(); if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2))) return false; UMat src = _src.getUMat(); Size dsize = Size(ssize.width * 2, ssize.height * 2); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; const int local_size = 16; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), channels, local_size ); size_t globalThreads[2] = { dst.cols, dst.rows }; size_t localThreads[2] = { local_size, local_size }; ocl::Kernel k; if (ocl::Device::getDefault().isIntel() && channels == 1) { k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions); globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2; } else k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); return k.run(2, globalThreads, localThreads, false); }
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (cn > 4 || (depth == CV_64F && !doubleSupport)) return false; Size ssize = _src.size(); Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; if (dsize.height < 2 || dsize.width < 2) return false; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); UMat src = _src.getUMat(); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; const int local_size = 256; int kercn = 1; if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel()) kercn = 4; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)), ocl::convertTypeStr(float_depth, depth, cn, cvt[0]), ocl::convertTypeStr(depth, float_depth, cn, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), cn, kercn, float_depth, borderMap[borderType], local_size ); ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); size_t localThreads[2] = { local_size/kercn, 1 }; size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, (dst.rows + 1) / 2 }; return k.run(2, globalThreads, localThreads, false); }
bool BackgroundSubtractorMOG2Impl::ocl_getBackgroundImage(OutputArray _backgroundImage) const { _backgroundImage.create(frameSize, frameType); UMat dst = _backgroundImage.getUMat(); int idxArg = 0; idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_bgmodelUsedModes)); idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_weight)); idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_mean)); idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst)); kernel_getBg.set(idxArg, backgroundRatio); size_t globalsize[2] = {(size_t)u_bgmodelUsedModes.cols, (size_t)u_bgmodelUsedModes.rows}; return kernel_getBg.run(2, globalsize, NULL, false); }
static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result) { _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F); if (_image.channels() == 1) return(convolve_dft(_image, _templ, _result)); else { UMat image = _image.getUMat(); UMat templ = _templ.getUMat(); UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F); bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_); if (ok==false) return false; UMat result = _result.getUMat(); return (extractFirstChannel_32F(result_, _result, _image.channels())); } }
static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, const Mat & kd, const Mat & ks, double scale, double delta, int borderType, int depth, int ddepth) { int iscale = cvRound(scale), idelta = cvRound(delta); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON; int cn = _src.channels(), wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1; if (!doubleSupport && wdepth == CV_64F) return false; char cvt[2][40]; ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, format("-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " "-D convertToWT=%s -D convertToDT=%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::typeToStr(wdepth), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat d2x, d2y; sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType); sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType); UMat dst = _dst.getUMat(); ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x), d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y), dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); if (wdepth >= CV_32F) k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta); else k.args(d2xarg, d2yarg, dstarg, iscale, idelta); size_t globalsize[] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ((depth == CV_64F) && !(doubleSupport)) return false; Size ssize = _src.size(); Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); UMat src = _src.getUMat(); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " "-D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), channels ); ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); size_t localThreads[2] = { 256, 1 }; size_t globalThreads[2] = { src.cols, dst.rows }; return k.run(2, globalThreads, localThreads, false); }
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) || (!doubleSupport && depth == CV_64F)) return false; const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Device dev = ocl::Device::getDefault(); int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type], ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); if (depth <= CV_32S) thresh = cvFloor(thresh); const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 }; double min_val = min_vals[depth]; k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val)))); size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size; return k.run(2, globalsize, NULL, false); }
void UMat::copyTo(OutputArray _dst) const { int dtype = _dst.type(); if( _dst.fixedType() && dtype != type() ) { CV_Assert( channels() == CV_MAT_CN(dtype) ); convertTo( _dst, dtype ); return; } if( empty() ) { _dst.release(); return; } size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize(); for( i = 0; i < (size_t)dims; i++ ) sz[i] = size.p[i]; sz[dims-1] *= esz; ndoffset(srcofs); srcofs[dims-1] *= esz; _dst.create( dims, size.p, type() ); if( _dst.isUMat() ) { UMat dst = _dst.getUMat(); if( u == dst.u && dst.offset == offset ) return; if (u->currAllocator == dst.u->currAllocator) { dst.ndoffset(dstofs); dstofs[dims-1] *= esz; u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false); return; } } Mat dst = _dst.getMat(); u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p); }
bool BackgroundSubtractorMOG2Impl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate) { ++nframes; learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history ); CV_Assert(learningRate >= 0); _fgmask.create(_image.size(), CV_8U); UMat fgmask = _fgmask.getUMat(); const double alpha1 = 1.0f - learningRate; UMat frame = _image.getUMat(); float varMax = MAX(fVarMin, fVarMax); float varMin = MIN(fVarMin, fVarMax); int idxArg = 0; idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame)); idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_bgmodelUsedModes)); idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_weight)); idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_mean)); idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_variance)); idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask)); idxArg = kernel_apply.set(idxArg, (float)learningRate); //alphaT idxArg = kernel_apply.set(idxArg, (float)alpha1); idxArg = kernel_apply.set(idxArg, (float)(-learningRate*fCT)); //prune idxArg = kernel_apply.set(idxArg, (float)varThreshold); //c_Tb idxArg = kernel_apply.set(idxArg, backgroundRatio); //c_TB idxArg = kernel_apply.set(idxArg, varThresholdGen); //c_Tg idxArg = kernel_apply.set(idxArg, varMin); idxArg = kernel_apply.set(idxArg, varMax); idxArg = kernel_apply.set(idxArg, fVarInit); idxArg = kernel_apply.set(idxArg, fTau); if (bShadowDetection) kernel_apply.set(idxArg, nShadowDetection); size_t globalsize[] = {(size_t)frame.cols, (size_t)frame.rows, 1}; return kernel_apply.run(2, globalsize, NULL, true); }
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) { int type = _src.type(), cn = CV_MAT_CN(type); if (cn > 4 || cn == 3) return false; const char * kernelName; int flipType; if (flipCode == 0) kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; else if (flipCode > 0) kernelName = "arithm_flip_cols", flipType = FLIP_COLS; else kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; Size size = _src.size(); int cols = size.width, rows = size.height; if ((cols == 1 && flipType == FLIP_COLS) || (rows == 1 && flipType == FLIP_ROWS) || (rows == 1 && cols == 1 && flipType == FLIP_BOTH)) { _src.copyTo(_dst); return true; } ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, format( "-D type=%s", ocl::memopTypeToStr(type))); if (k.empty()) return false; _dst.create(size, type); UMat src = _src.getUMat(), dst = _dst.getUMat(); cols = flipType == FLIP_COLS ? ((cols+1)/2) : cols; rows = flipType & FLIP_ROWS ? ((rows+1)/2) : rows; size_t globalsize[2] = { cols, rows }; return k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), rows, cols).run(2, globalsize, NULL, false); }
static bool ocl_blendLinear( InputArray _src1, InputArray _src2, InputArray _weights1, InputArray _weights2, OutputArray _dst ) { int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); char cvt[30]; ocl::Kernel k("blendLinear", ocl::imgproc::blend_linear_oclsrc, format("-D T=%s -D cn=%d -D convertToT=%s", ocl::typeToStr(depth), cn, ocl::convertTypeStr(CV_32F, depth, 1, cvt))); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), weights1 = _weights1.getUMat(), weights2 = _weights2.getUMat(), dst = _dst.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::ReadOnlyNoSize(weights1), ocl::KernelArg::ReadOnlyNoSize(weights2), ocl::KernelArg::WriteOnly(dst)); size_t globalsize[2] = { (size_t)dst.cols, (size_t)dst.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights) { int cn = _src.channels(); ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc, format("-D cn=%d", cn)); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), src.type()); _dst.setTo(Scalar::all(0)); UMat dst = _dst.getUMat(); const int ksize = (btvKernelSize - 1) / 2; k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights)); size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows }; return k.run(2, globalsize, NULL, false); }
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result) { int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); char cvt[40]; ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc, format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn)); if (k.empty()) return false; UMat image = _image.getUMat(), templ = _templ.getUMat(); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)); size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result) { int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); ocl::Device dev = ocl::Device::getDefault(); int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; int rated_cn = cn; int wtype1 = wtype; if (pxPerWIx!=1) { rated_cn = pxPerWIx; type = CV_MAKE_TYPE(depth, rated_cn); wtype1 = CV_MAKE_TYPE(wdepth, rated_cn); } char cvt[40]; char cvt1[40]; const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt); const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1); ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc, format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype), convertToWT, convertToWT1, cn, pxPerWIx)); if (k.empty()) return false; UMat image = _image.getUMat(), templ = _templ.getUMat(); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); UMat result = _result.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)); size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows}; return k.run(2, globalsize, NULL, false); }
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (depth == CV_64F && !doubleSupport) return false; Size ssize = _src.size(); if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2))) return false; UMat src = _src.getUMat(); Size dsize = Size(ssize.width * 2, ssize.height * 2); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc, format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); size_t globalThreads[2] = {dst.cols, dst.rows}; size_t localThreads[2] = {16, 16}; return k.run(2, globalThreads, localThreads, false); }
/* dst = src */ void Mat::copyTo( OutputArray _dst ) const { int dtype = _dst.type(); if( _dst.fixedType() && dtype != type() ) { CV_Assert( channels() == CV_MAT_CN(dtype) ); convertTo( _dst, dtype ); return; } if( empty() ) { _dst.release(); return; } if( _dst.isUMat() ) { _dst.create( dims, size.p, type() ); UMat dst = _dst.getUMat(); size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize(); for( i = 0; i < (size_t)dims; i++ ) sz[i] = size.p[i]; sz[dims-1] *= esz; dst.ndoffset(dstofs); dstofs[dims-1] *= esz; dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p); return; } if( dims <= 2 ) { _dst.create( rows, cols, type() ); Mat dst = _dst.getMat(); if( data == dst.data ) return; if( rows > 0 && cols > 0 ) { const uchar* sptr = data; uchar* dptr = dst.data; Size sz = getContinuousSize(*this, dst); size_t len = sz.width*elemSize(); for( ; sz.height--; sptr += step, dptr += dst.step ) memcpy( dptr, sptr, len ); } return; } _dst.create( dims, size, type() ); Mat dst = _dst.getMat(); if( data == dst.data ) return; if( total() != 0 ) { const Mat* arrays[] = { this, &dst }; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs, 2); size_t sz = it.size*elemSize(); for( size_t i = 0; i < it.nplanes; i++, ++it ) memcpy(ptrs[1], ptrs[0], sz); } }
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh, int aperture_size, bool L2gradient, int cn, const Size & size) { CV_INSTRUMENT_REGION_OPENCL() UMat map; const ocl::Device &dev = ocl::Device::getDefault(); int max_wg_size = (int)dev.maxWorkGroupSize(); int lSizeX = 32; int lSizeY = max_wg_size / 32; if (lSizeY == 0) { lSizeX = 16; lSizeY = max_wg_size / 16; } if (lSizeY == 0) { lSizeY = 1; } if (aperture_size == 7) { low_thresh = low_thresh / 16.0f; high_thresh = high_thresh / 16.0f; } if (L2gradient) { low_thresh = std::min(32767.0f, low_thresh); high_thresh = std::min(32767.0f, high_thresh); if (low_thresh > 0) low_thresh *= low_thresh; if (high_thresh > 0) high_thresh *= high_thresh; } int low = cvFloor(low_thresh), high = cvFloor(high_thresh); if (!useCustomDeriv && aperture_size == 3 && !_src.isSubmatrix()) { /* stage1_with_sobel: Sobel operator Calc magnitudes Non maxima suppression Double thresholding */ char cvt[40]; ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc, format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s", cn, ocl::memopTypeToStr(_src.depth()), ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt), ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)), lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : "")); if (with_sobel.empty()) return false; UMat src = _src.getUMat(); map.create(size, CV_32S); with_sobel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(map), (float) low, (float) high); size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }, localsize[2] = { (size_t)lSizeX, (size_t)lSizeY }; if (!with_sobel.run(2, globalsize, localsize, false)) return false; } else { /* stage1_without_sobel: Calc magnitudes Non maxima suppression Double thresholding */ double scale = 1.0; if (aperture_size == 7) { scale = 1 / 16.0; } UMat dx, dy; if (!useCustomDeriv) { Sobel(_src, dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE); Sobel(_src, dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE); } else { dx = dx_; dy = dy_; } ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc, format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s", cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : "")); if (without_sobel.empty()) return false; map.create(size, CV_32S); without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), ocl::KernelArg::WriteOnly(map), low, high); size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }, localsize[2] = { (size_t)lSizeX, (size_t)lSizeY }; if (!without_sobel.run(2, globalsize, localsize, false)) return false; } int PIX_PER_WI = 8; /* stage2: hysteresis (add weak edges if they are connected with strong edges) */ int sizey = lSizeY / PIX_PER_WI; if (sizey == 0) sizey = 1; size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey }; ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc, format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d", PIX_PER_WI, lSizeX, sizey)); if (edgesHysteresis.empty()) return false; edgesHysteresis.args(ocl::KernelArg::ReadWrite(map)); if (!edgesHysteresis.run(2, globalsize, localsize, false)) return false; // get edges ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc, format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI)); if (getEdgesKernel.empty()) return false; _dst.create(size, CV_8UC1); UMat dst = _dst.getUMat(); getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst)); return getEdgesKernel.run(2, globalsize, NULL, false); }
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat temp, image_sums, image_sqsums; integral(_image, image_sums, image_sqsums, CV_32F, CV_32F); int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); Size size = _image.size(), tsize = templ.size(); _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); float scale = 1.f / tsize.area(); if (cn == 1) { float templ_sum = (float)sum(templ)[0]; multiply(templ, templ, temp, 1, CV_32F); float templ_sqsum = (float)sum(temp)[0]; templ_sqsum -= scale * templ_sum * templ_sum; templ_sum *= scale; if (templ_sqsum < DBL_EPSILON) { result = Scalar::all(1); return true; } k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum); } else { Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0); templ_sum = sum(templ); multiply(templ, templ, temp, 1, CV_32F); templ_sqsum = sum(temp); float templ_sqsum_sum = 0; for (int i = 0; i < cn; i ++) templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; templ_sum *= scale; if (templ_sqsum_sum < DBL_EPSILON) { result = Scalar::all(1); return true; } k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum_sum); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result) { ConvolveBuf buf; CV_Assert(_image.type() == CV_32F); CV_Assert(_templ.type() == CV_32F); buf.create(_image.size(), _templ.size()); _result.create(buf.result_size, CV_32F); UMat image = _image.getUMat(); UMat templ = _templ.getUMat(); UMat result = _result.getUMat(); Size& block_size = buf.block_size; Size& dft_size = buf.dft_size; UMat& image_block = buf.image_block; UMat& templ_block = buf.templ_block; UMat& result_data = buf.result_data; UMat& image_spect = buf.image_spect; UMat& templ_spect = buf.templ_spect; UMat& result_spect = buf.result_spect; UMat templ_roi = templ; copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, templ_block.cols - templ_roi.cols, BORDER_ISOLATED); dft(templ_block, templ_spect, 0, templ.rows); // Process all blocks of the result matrix for (int y = 0; y < result.rows; y += block_size.height) { for (int x = 0; x < result.cols; x += block_size.width) { Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, std::min(y + dft_size.height, image.rows) - y); Rect roi0(x, y, image_roi_size.width, image_roi_size.height); UMat image_roi(image, roi0); copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, 0, image_block.cols - image_roi.cols, BORDER_ISOLATED); dft(image_block, image_spect, 0); mulSpectrums(image_spect, templ_spect, result_spect, 0, true); dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); Size result_roi_size(std::min(x + block_size.width, result.cols) - x, std::min(y + block_size.height, result.rows) - y); Rect roi1(x, y, result_roi_size.width, result_roi_size.height); Rect roi2(0, 0, result_roi_size.width, result_roi_size.height); UMat result_roi(result, roi1); UMat result_block(result_data, roi2); result_block.copyTo(result_roi); } } return true; }
bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptors) { int dsize = params->descriptorSize(); int nFeatures = keypoints.cols; if (nFeatures == 0) { _descriptors.release(); return true; } _descriptors.create(nFeatures, dsize, CV_32F); UMat descriptors; if( _descriptors.isUMat() ) descriptors = _descriptors.getUMat(); else descriptors.create(nFeatures, dsize, CV_32F); ocl::Kernel kerCalcDesc, kerNormDesc; if( dsize == 64 ) { kerCalcDesc.create("SURF_computeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts); kerNormDesc.create("SURF_normalizeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts); } else { CV_Assert(dsize == 128); kerCalcDesc.create("SURF_computeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts); kerNormDesc.create("SURF_normalizeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts); } size_t localThreads[] = {6, 6}; size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]}; if(haveImageSupport) { kerCalcDesc.args(imgTex, img_rows, img_cols, ocl::KernelArg::ReadOnlyNoSize(keypoints), ocl::KernelArg::WriteOnlyNoSize(descriptors)); } else { kerCalcDesc.args(ocl::KernelArg::ReadOnlyNoSize(img), img_rows, img_cols, ocl::KernelArg::ReadOnlyNoSize(keypoints), ocl::KernelArg::WriteOnlyNoSize(descriptors)); } if(!kerCalcDesc.run(2, globalThreads, localThreads, true)) return false; size_t localThreads_n[] = {dsize, 1}; size_t globalThreads_n[] = {nFeatures*localThreads_n[0], localThreads_n[1]}; globalThreads[0] = nFeatures * localThreads[0]; globalThreads[1] = localThreads[1]; bool ok = kerNormDesc.args(ocl::KernelArg::ReadWriteNoSize(descriptors)). run(2, globalThreads_n, localThreads_n, true); if(ok && !_descriptors.isUMat()) descriptors.copyTo(_descriptors); return ok; }