Ejemplo n.º 1
0
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
{
    if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0  ||
         !(sdepth == CV_32S || sdepth == CV_32F) )
        return false;

    ocl::Kernel k1("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc,
                   format("-D sdepth=%d", sdepth));
    if (k1.empty())
        return false;

    Size size = _src.size(), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width),
            ssize(size.width + 1, size.height + 1);
    _sum.create(ssize, sdepth);
    UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat();
    t_sum = t_sum(Range::all(), Range(0, size.height));

    int offset = (int)src.offset / vlen, pre_invalid = (int)src.offset % vlen;
    int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
    int sum_offset = (int)sum.offset / vlen;

    k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
            offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step);
    size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
    if (!k1.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel k2("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc,
                   format("-D sdepth=%d", sdepth));
    k2.args(ocl::KernelArg::PtrReadWrite(t_sum), ocl::KernelArg::PtrWriteOnly(sum),
            t_sum.rows, t_sum.cols, (int)t_sum.step, (int)sum.step, sum_offset);

    size_t gt2 = t_sum.cols  * 32, lt2 = 256;
    return k2.run(1, &gt2, &lt2, false);
}
Ejemplo n.º 2
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 ||
         (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    char cvt[40];
    String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s",
                         sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth),
                         ocl::convertTypeStr(sdepth, sqdepth, 4, cvt),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k1.empty())
        return false;

    Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1),
            t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width);
    UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth);
    t_sum = t_sum(Range::all(), Range(0, size.height));
    t_sqsum = t_sqsum(Range::all(), Range(0, size.height));

    _sum.create(dsize, sdepth);
    _sqsum.create(dsize, sqdepth);
    UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat();

    int offset = src.offset / vlen;
    int pre_invalid = src.offset % vlen;
    int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
    int sum_offset = sum.offset / sum.elemSize();
    int sqsum_offset = sqsum.offset / sqsum.elemSize();
    CV_Assert(sqsum.offset % sqsum.elemSize() == 0);

    k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
            ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows,
            src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step);

    size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
    if (!k1.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k2.empty())
        return false;

    k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum),
            ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum),
            t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step,
            (int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset);

    size_t gt2 = t_sum.cols  * 32, lt2 = 256;
    return k2.run(1, &gt2, &lt2, false);
}
Ejemplo n.º 3
0
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
                                  InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
{
    const ocl::Device & dev = ocl::Device::getDefault();
    int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
         (_src.offset() == 0) && (_src.step() % 4 == 0) &&
         (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
        return false;

    Mat kernelX = _kernelX.getMat().reshape(1, 1);
    if (kernelX.cols % 2 != 1)
        return false;
    Mat kernelY = _kernelY.getMat().reshape(1, 1);
    if (kernelY.cols % 2 != 1)
        return false;

    if (ddepth < 0)
        ddepth = sdepth;

    Size size = _src.size();
    size_t globalsize[2] = { 0, 0 };
    size_t localsize[2] = { 0, 0 };

    globalsize[0] = size.width / 16;
    globalsize[1] = size.height / 2;

    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
    char build_opts[1024];
    sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
            ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
            ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());

    ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
    if (kernel.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(size, CV_MAKETYPE(ddepth, cn));
    if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
        return false;
    UMat dst = _dst.getUMat();

    int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
    idxArg = kernel.set(idxArg, (int)src.step);
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    idxArg = kernel.set(idxArg, (int)dst.step);
    idxArg = kernel.set(idxArg, (int)dst.rows);
    idxArg = kernel.set(idxArg, (int)dst.cols);
    idxArg = kernel.set(idxArg, static_cast<float>(delta));

    return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
Ejemplo n.º 4
0
static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
                           const Mat & kd, const Mat & ks, double scale, double delta,
                           int borderType, int depth, int ddepth)
{
    const size_t tileSizeX = 16;
    const size_t tileSizeYmin = 8;

    const ocl::Device dev = ocl::Device::getDefault();

    int stype = _src.type();
    int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype);

    bool doubleSupport = dev.doubleFPConfig() > 0;
    if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
        return false;

    Mat kernelX = kd.reshape(1, 1);
    if (kernelX.cols % 2 != 1)
        return false;
    Mat kernelY = ks.reshape(1, 1);
    if (kernelY.cols % 2 != 1)
        return false;
    CV_Assert(kernelX.cols == kernelY.cols);

    size_t wgs = dev.maxWorkGroupSize();
    size_t lmsz = dev.localMemSize();
    size_t src_step = _src.step(), src_offset = _src.offset();
    const size_t tileSizeYmax = wgs / tileSizeX;

    // workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory
    int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;

    if (((src_offset % src_step) % esz == 0) &&
        (
         (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) ||
         ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) &&
          (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax)))
        ) &&
        (tileSizeX * tileSizeYmin <= wgs) &&
        (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz)
       )
    {
        Size size = _src.size(), wholeSize;
        Point origin;
        int dtype = CV_MAKE_TYPE(ddepth, cn);
        int wdepth = CV_32F;

        size_t tileSizeY = tileSizeYmax;
        while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz))
        {
            tileSizeY /= 2;
        }
        size_t lt2[2] = { tileSizeX, tileSizeY};
        size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] };

        char cvt[2][40];
        const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
                                           "BORDER_REFLECT_101" };

        String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
                                 " -D convertToWT=%s -D convertToDT=%s"
                                 " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s"
                                 " -D srcT=%s -D dstT=%s -D WT=%s"
                                 " -D CN=%d ",
                                 (int)lt2[0], (int)lt2[1], kernelX.cols / 2,
                                 ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(),
                                 ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(),
                                 ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
                                 ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
                                 borderMap[borderType],
                                 ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth),
                                 ocl::typeToStr(CV_MAKETYPE(sdepth, cn)),
                                 ocl::typeToStr(CV_MAKETYPE(ddepth, cn)),
                                 ocl::typeToStr(CV_MAKETYPE(wdepth, cn)),
                                 cn);

        ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts);
        if (k.empty())
            return false;
        UMat src = _src.getUMat();
        _dst.create(size, dtype);
        UMat dst = _dst.getUMat();

        int src_offset_x = static_cast<int>((src_offset % src_step) / esz);
        int src_offset_y = static_cast<int>(src_offset / src_step);

        src.locateROI(wholeSize, origin);

        k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
               wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
               static_cast<float>(scale), static_cast<float>(delta));

        return k.run(2, gt2, lt2, false);
    }
    int iscale = cvRound(scale), idelta = cvRound(delta);
    bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
    int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;

    if (!doubleSupport && wdepth == CV_64F)
        return false;

    char cvt[2][40];
    ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
                  format("-D ONLY_SUM_CONVERT "
                         "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
                         "-D convertToWT=%s -D convertToDT=%s%s",
                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
                         ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
                         ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
                         ocl::typeToStr(wdepth), wdepth,
                         ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
                         ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat d2x, d2y;
    sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
    sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);

    UMat dst = _dst.getUMat();

    ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
            d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
            dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);

    if (wdepth >= CV_32F)
        k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
    else
        k.args(d2xarg, d2yarg, dstarg, iscale, idelta);

    size_t globalsize[] = { dst.cols * cn / kercn, dst.rows };
    return k.run(2, globalsize, NULL, false);
}