Example #1
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 ||
         (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    char cvt[40];
    String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s",
                         sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth),
                         ocl::convertTypeStr(sdepth, sqdepth, 4, cvt),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k1.empty())
        return false;

    Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1),
            t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width);
    UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth);
    t_sum = t_sum(Range::all(), Range(0, size.height));
    t_sqsum = t_sqsum(Range::all(), Range(0, size.height));

    _sum.create(dsize, sdepth);
    _sqsum.create(dsize, sqdepth);
    UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat();

    int offset = src.offset / vlen;
    int pre_invalid = src.offset % vlen;
    int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
    int sum_offset = sum.offset / sum.elemSize();
    int sqsum_offset = sqsum.offset / sqsum.elemSize();
    CV_Assert(sqsum.offset % sqsum.elemSize() == 0);

    k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
            ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows,
            src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step);

    size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
    if (!k1.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k2.empty())
        return false;

    k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum),
            ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum),
            t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step,
            (int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset);

    size_t gt2 = t_sum.cols  * 32, lt2 = 256;
    return k2.run(1, &gt2, &lt2, false);
}