static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) { if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 || !(sdepth == CV_32S || sdepth == CV_32F) ) return false; ocl::Kernel k1("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, format("-D sdepth=%d", sdepth)); if (k1.empty()) return false; Size size = _src.size(), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width), ssize(size.width + 1, size.height + 1); _sum.create(ssize, sdepth); UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat(); t_sum = t_sum(Range::all(), Range(0, size.height)); int offset = (int)src.offset / vlen, pre_invalid = (int)src.offset % vlen; int vcols = (pre_invalid + src.cols + vlen - 1) / vlen; int sum_offset = (int)sum.offset / vlen; k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum), offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step); size_t gt = ((vcols + 1) / 2) * 256, lt = 256; if (!k1.run(1, >, <, false)) return false; ocl::Kernel k2("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, format("-D sdepth=%d", sdepth)); k2.args(ocl::KernelArg::PtrReadWrite(t_sum), ocl::KernelArg::PtrWriteOnly(sum), t_sum.rows, t_sum.cols, (int)t_sum.step, (int)sum.step, sum_offset); size_t gt2 = t_sum.cols * 32, lt2 = 256; return k2.run(1, >2, <2, false); }
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) return false; char cvt[40]; String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s", sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth), ocl::convertTypeStr(sdepth, sqdepth, 4, cvt), doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts); if (k1.empty()) return false; Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width); UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth); t_sum = t_sum(Range::all(), Range(0, size.height)); t_sqsum = t_sqsum(Range::all(), Range(0, size.height)); _sum.create(dsize, sdepth); _sqsum.create(dsize, sqdepth); UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat(); int offset = src.offset / vlen; int pre_invalid = src.offset % vlen; int vcols = (pre_invalid + src.cols + vlen - 1) / vlen; int sum_offset = sum.offset / sum.elemSize(); int sqsum_offset = sqsum.offset / sqsum.elemSize(); CV_Assert(sqsum.offset % sqsum.elemSize() == 0); k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum), ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step); size_t gt = ((vcols + 1) / 2) * 256, lt = 256; if (!k1.run(1, >, <, false)) return false; ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts); if (k2.empty()) return false; k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum), ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum), t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step, (int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset); size_t gt2 = t_sum.cols * 32, lt2 = 256; return k2.run(1, >2, <2, false); }