static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, const Mat & kd, const Mat & ks, double scale, double delta, int borderType, int depth, int ddepth) { int iscale = cvRound(scale), idelta = cvRound(delta); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON; int cn = _src.channels(), wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1; if (!doubleSupport && wdepth == CV_64F) return false; char cvt[2][40]; ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, format("-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " "-D convertToWT=%s -D convertToDT=%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::typeToStr(wdepth), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat d2x, d2y; sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType); sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType); UMat dst = _dst.getUMat(); ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x), d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y), dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); if (wdepth >= CV_32F) k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta); else k.args(d2xarg, d2yarg, dstarg, iscale, idelta); size_t globalsize[] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); }
void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType ) { int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if (ddepth < 0) ddepth = sdepth; int dtype = CV_MAKE_TYPE(ddepth, cn); _dst.create( _src.size(), dtype ); #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::useTegra() && scale == 1.0 && delta == 0) { Mat src = _src.getMat(), dst = _dst.getMat(); if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType)) return; if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType)) return; } #endif #ifdef HAVE_IPP CV_IPP_CHECK() { if (ksize < 0) { if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } } else if (0 < ksize) { if (IPPDerivSobel(_src, _dst, ddepth, dx, dy, ksize, scale, delta, borderType)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } } } #endif int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); Mat kx, ky; getDerivKernels( kx, ky, dx, dy, ksize, false, ktype ); if( scale != 1 ) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if( dx == 0 ) kx *= scale; else ky *= scale; } sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); }
void Scharr( const Mat& src, Mat& dst, int ddepth, int dx, int dy, double scale, double delta, int borderType ) { int ktype = std::max(CV_32F, std::max(ddepth, src.depth())); Mat kx, ky; getScharrKernels( kx, ky, dx, dy, false, ktype ); if( scale != 1 ) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if( dx == 0 ) kx *= scale; else ky *= scale; } sepFilter2D( src, dst, ddepth, kx, ky, Point(-1,-1), delta, borderType ); }
void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType ) { int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if (ddepth < 0) ddepth = sdepth; _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) ); #ifdef HAVE_TEGRA_OPTIMIZATION if (scale == 1.0 && delta == 0) { Mat src = _src.getMat(), dst = _dst.getMat(); if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType)) return; if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType)) return; } #endif #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if(dx < 3 && dy < 3 && cn == 1 && borderType == BORDER_REPLICATE) { Mat src = _src.getMat(), dst = _dst.getMat(); if (IPPDeriv(src, dst, ddepth, dx, dy, ksize,scale)) return; } #endif int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); Mat kx, ky; getDerivKernels( kx, ky, dx, dy, ksize, false, ktype ); if( scale != 1 ) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if( dx == 0 ) kx *= scale; else ky *= scale; } sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); }
void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType ) { Mat src = _src.getMat(); if (ddepth < 0) ddepth = src.depth(); _dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); Mat dst = _dst.getMat(); #ifdef HAVE_TEGRA_OPTIMIZATION if (scale == 1.0 && delta == 0) if (tegra::scharr(src, dst, dx, dy, borderType)) return; #endif #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) if(dx < 2 && dy < 2 && src.channels() == 1 && borderType == 1) { if(IPPDerivScharr(src, dst, ddepth, dx, dy, scale)) return; } #endif int ktype = std::max(CV_32F, std::max(ddepth, src.depth())); Mat kx, ky; getScharrKernels( kx, ky, dx, dy, false, ktype ); if( scale != 1 ) { // usually the smoothing part is the slowest to compute, // so try to scale it instead of the faster differenciating part if( dx == 0 ) kx *= scale; else ky *= scale; } sepFilter2D( src, dst, ddepth, kx, ky, Point(-1,-1), delta, borderType ); }
void RhoanaBlocksGainCompensator::feed(const vector<Point> &corners, const vector<UMat> &images, const vector<pair<UMat,uchar> > &masks) { CV_Assert(corners.size() == images.size() && images.size() == masks.size()); const int num_images = static_cast<int>(images.size()); vector<Size> bl_per_imgs(num_images); vector<Point> block_corners; vector<UMat> block_images; vector<pair<UMat,uchar> > block_masks; // Construct blocks for gain compensator for (int img_idx = 0; img_idx < num_images; ++img_idx) { Size bl_per_img((images[img_idx].cols + bl_width_ - 1) / bl_width_, (images[img_idx].rows + bl_height_ - 1) / bl_height_); int bl_width = (images[img_idx].cols + bl_per_img.width - 1) / bl_per_img.width; int bl_height = (images[img_idx].rows + bl_per_img.height - 1) / bl_per_img.height; bl_per_imgs[img_idx] = bl_per_img; for (int by = 0; by < bl_per_img.height; ++by) { for (int bx = 0; bx < bl_per_img.width; ++bx) { Point bl_tl(bx * bl_width, by * bl_height); Point bl_br(min(bl_tl.x + bl_width, images[img_idx].cols), min(bl_tl.y + bl_height, images[img_idx].rows)); block_corners.push_back(corners[img_idx] + bl_tl); block_images.push_back(images[img_idx](Rect(bl_tl, bl_br))); block_masks.push_back(make_pair(masks[img_idx].first(Rect(bl_tl, bl_br)), masks[img_idx].second)); } } } RhoanaGainCompensator compensator; compensator.feed(block_corners, block_images, block_masks); vector<double> gains = compensator.gains(); gain_maps_.resize(num_images); Mat_<float> ker(1, 3); ker(0,0) = 0.25; ker(0,1) = 0.5; ker(0,2) = 0.25; int bl_idx = 0; for (int img_idx = 0; img_idx < num_images; ++img_idx) { Size bl_per_img = bl_per_imgs[img_idx]; gain_maps_[img_idx].create(bl_per_img, CV_32F); { Mat_<float> gain_map = gain_maps_[img_idx].getMat(ACCESS_WRITE); for (int by = 0; by < bl_per_img.height; ++by) for (int bx = 0; bx < bl_per_img.width; ++bx, ++bl_idx) gain_map(by, bx) = static_cast<float>(gains[bl_idx]); } sepFilter2D(gain_maps_[img_idx], gain_maps_[img_idx], CV_32F, ker, ker); sepFilter2D(gain_maps_[img_idx], gain_maps_[img_idx], CV_32F, ker, ker); } }
static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, const Mat & kd, const Mat & ks, double scale, double delta, int borderType, int depth, int ddepth) { const size_t tileSizeX = 16; const size_t tileSizeYmin = 8; const ocl::Device dev = ocl::Device::getDefault(); int stype = _src.type(); int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype); bool doubleSupport = dev.doubleFPConfig() > 0; if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; Mat kernelX = kd.reshape(1, 1); if (kernelX.cols % 2 != 1) return false; Mat kernelY = ks.reshape(1, 1); if (kernelY.cols % 2 != 1) return false; CV_Assert(kernelX.cols == kernelY.cols); size_t wgs = dev.maxWorkGroupSize(); size_t lmsz = dev.localMemSize(); size_t src_step = _src.step(), src_offset = _src.offset(); const size_t tileSizeYmax = wgs / tileSizeX; // workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn; if (((src_offset % src_step) % esz == 0) && ( (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) || ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) && (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax))) ) && (tileSizeX * tileSizeYmin <= wgs) && (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz) ) { Size size = _src.size(), wholeSize; Point origin; int dtype = CV_MAKE_TYPE(ddepth, cn); int wdepth = CV_32F; size_t tileSizeY = tileSizeYmax; while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz)) { tileSizeY /= 2; } size_t lt2[2] = { tileSizeX, tileSizeY}; size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] }; char cvt[2][40]; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s" " -D convertToWT=%s -D convertToDT=%s" " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s" " -D srcT=%s -D dstT=%s -D WT=%s" " -D CN=%d ", (int)lt2[0], (int)lt2[1], kernelX.cols / 2, ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(), ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType], ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth), ocl::typeToStr(CV_MAKETYPE(sdepth, cn)), ocl::typeToStr(CV_MAKETYPE(ddepth, cn)), ocl::typeToStr(CV_MAKETYPE(wdepth, cn)), cn); ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(size, dtype); UMat dst = _dst.getUMat(); int src_offset_x = static_cast<int>((src_offset % src_step) / esz); int src_offset_y = static_cast<int>(src_offset / src_step); src.locateROI(wholeSize, origin); k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y, wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst), static_cast<float>(scale), static_cast<float>(delta)); return k.run(2, gt2, lt2, false); } int iscale = cvRound(scale), idelta = cvRound(delta); bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON; int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1; if (!doubleSupport && wdepth == CV_64F) return false; char cvt[2][40]; ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, format("-D ONLY_SUM_CONVERT " "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " "-D convertToWT=%s -D convertToDT=%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::typeToStr(wdepth), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat d2x, d2y; sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType); sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType); UMat dst = _dst.getUMat(); ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x), d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y), dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); if (wdepth >= CV_32F) k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta); else k.args(d2xarg, d2yarg, dstarg, iscale, idelta); size_t globalsize[] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); }