Example #1
0
void UMat::copyTo(OutputArray _dst, InputArray _mask) const
{
    if( _mask.empty() )
    {
        copyTo(_dst);
        return;
    }
#ifdef HAVE_OPENCL
    int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
    CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );

    if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
    {
        UMatData * prevu = _dst.getUMat().u;
        _dst.create( dims, size, type() );

        UMat dst = _dst.getUMat();

        bool haveDstUninit = false;
        if( prevu != dst.u ) // do not leave dst uninitialized
            haveDstUninit = true;

        String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
                             ocl::memopTypeToStr(depth()), cn, mcn,
                             haveDstUninit ? " -D HAVE_DST_UNINIT" : "");

        ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
        if (!k.empty())
        {
            k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
                   ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
                   haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
                                   ocl::KernelArg::ReadWrite(dst));

            size_t globalsize[2] = { (size_t)cols, (size_t)rows };
            if (k.run(2, globalsize, NULL, false))
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
                return;
            }
        }
    }
#endif
    Mat src = getMat(ACCESS_READ);
    src.copyTo(_dst, _mask);
}
Example #2
0
void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
{
    bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
    int stype = type(), cn = CV_MAT_CN(stype);

    if( _type < 0 )
        _type = _dst.fixedType() ? _dst.type() : stype;
    else
        _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);

    int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
    if( sdepth == ddepth && noScale )
    {
        copyTo(_dst);
        return;
    }
#ifdef HAVE_OPENCL
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
    if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
            ((needDouble && doubleSupport) || !needDouble) )
    {
        int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;

        char cvt[2][40];
        ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
                      format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
                             ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
                             ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
                             ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
                             doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
        if (!k.empty())
        {
            UMat src = *this;
            _dst.create( size(), _type );
            UMat dst = _dst.getUMat();

            float alphaf = (float)alpha, betaf = (float)beta;
            ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
                    dstarg = ocl::KernelArg::WriteOnly(dst, cn);

            if (wdepth == CV_32F)
                k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
            else
                k.args(srcarg, dstarg, alpha, beta, rowsPerWI);

            size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
            if (k.run(2, globalsize, NULL, false))
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
                return;
            }
        }
    }
#endif
    Mat m = getMat(ACCESS_READ);
    m.convertTo(_dst, _type, alpha, beta);
}
Example #3
0
    static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst)
    {
        ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc);
        if (k.empty())
            return false;

        UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
        _dst.create(src1.size(), src1.type());
        UMat dst = _dst.getUMat();

        int cn = src1.channels();
        k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
               ocl::KernelArg::ReadOnlyNoSize(src2),
               ocl::KernelArg::WriteOnly(dst, cn));

        size_t globalsize[2] = { (size_t)src1.cols * cn, (size_t)src1.rows };
        return k.run(2, globalsize, NULL, false);
    }
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
                                     int templateWindowSize, int searchWindowSize)
{
    int type = _src.type(), cn = CV_MAT_CN(type);
    Size size = _src.size();

    if ( type != CV_8UC1 || type != CV_8UC2 || type != CV_8UC4 )
        return false;

    int templateWindowHalfWize = templateWindowSize / 2;
    int searchWindowHalfSize = searchWindowSize / 2;
    templateWindowSize  = templateWindowHalfWize * 2 + 1;
    searchWindowSize = searchWindowHalfSize * 2 + 1;
    int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
    int almostTemplateWindowSizeSqBinShift = -1;

    char cvt[2][40];
    String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
                         " -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
                         " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
                         " -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s",
                         templateWindowSize, searchWindowSize, ocl::typeToStr(type),
                         ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, CTA_SIZE,
                         templateWindowHalfWize, searchWindowHalfSize,
                         ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
                         CTA_SIZE >> 1, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));

    ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
    if (k.empty())
        return false;

    UMat almostDist2Weight;
    if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
                                   almostTemplateWindowSizeSqBinShift))
        return false;
    CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);

    UMat srcex;
    int borderSize = searchWindowHalfSize + templateWindowHalfWize;
    copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);

    _dst.create(size, type);
    UMat dst = _dst.getUMat();

    int searchWindowSizeSq = searchWindowSize * searchWindowSize;
    Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
    Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);
    UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));

    srcex = srcex(Rect(Point(borderSize, borderSize), size));
    k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),
           ocl::KernelArg::PtrReadOnly(almostDist2Weight),
           ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);

    size_t globalsize[2] = { nblocksx * CTA_SIZE, nblocksy }, localsize[2] = { CTA_SIZE, 1 };
    return k.run(2, globalsize, localsize, false);
}
Example #5
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    static const int tileSize = 16;

    String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
                                ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
                                tileSize,
                                doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (kcols.empty())
        return false;

    UMat src = _src.getUMat();
    Size src_size = src.size();
    Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
    UMat buf(bufsize, sdepth);
    UMat buf_sq(bufsize, sqdepth);
    kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
    size_t gt = src.cols, lt = tileSize;
    if (!kcols.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (krows.empty())
        return false;

    Size sumsize(src_size.width + 1, src_size.height + 1);
    _sum.create(sumsize, sdepth);
    UMat sum = _sum.getUMat();
    _sqsum.create(sumsize, sqdepth);
    UMat sum_sq = _sqsum.getUMat();

    krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
    gt = src.rows;
    return krows.run(1, &gt, &lt, false);
}
Example #6
0
Point PlaneWarperOcl::warp(InputArray src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode, OutputArray dst)
{
    UMat uxmap, uymap;
    Rect dst_roi = buildMaps(src.size(), K, R, T, uxmap, uymap);

    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
    UMat udst = dst.getUMat();
    remap(src, udst, uxmap, uymap, interp_mode, border_mode);

    return dst_roi.tl();
}
Example #7
0
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
                                  InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
{
    const ocl::Device & dev = ocl::Device::getDefault();
    int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
         (_src.offset() == 0) && (_src.step() % 4 == 0) &&
         (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
        return false;

    Mat kernelX = _kernelX.getMat().reshape(1, 1);
    if (kernelX.cols % 2 != 1)
        return false;
    Mat kernelY = _kernelY.getMat().reshape(1, 1);
    if (kernelY.cols % 2 != 1)
        return false;

    if (ddepth < 0)
        ddepth = sdepth;

    Size size = _src.size();
    size_t globalsize[2] = { 0, 0 };
    size_t localsize[2] = { 0, 0 };

    globalsize[0] = size.width / 16;
    globalsize[1] = size.height / 2;

    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
    char build_opts[1024];
    sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
            ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
            ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());

    ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
    if (kernel.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(size, CV_MAKETYPE(ddepth, cn));
    if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
        return false;
    UMat dst = _dst.getUMat();

    int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
    idxArg = kernel.set(idxArg, (int)src.step);
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    idxArg = kernel.set(idxArg, (int)dst.step);
    idxArg = kernel.set(idxArg, (int)dst.rows);
    idxArg = kernel.set(idxArg, (int)dst.cols);
    idxArg = kernel.set(idxArg, static_cast<float>(delta));

    return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
Example #8
0
static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst)
{
    UMat src = _src.getUMat(), dst = _dst.getUMat();

    for (int y = 0; y < ny; ++y)
        for (int x = 0; x < nx; ++x)
        {
            Rect roi(x * src.cols, y * src.rows, src.cols, src.rows);
            UMat hdr(dst, roi);
            src.copyTo(hdr);
        }
    return true;
}
Example #9
0
void UMat::copyTo(OutputArray _dst, InputArray _mask) const
{
    if( _mask.empty() )
    {
        copyTo(_dst);
        return;
    }

    int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
    CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );

    if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
    {
        UMatData * prevu = _dst.getUMat().u;
        _dst.create( dims, size, type() );

        UMat dst = _dst.getUMat();

        if( prevu != dst.u ) // do not leave dst uninitialized
            dst = Scalar(0);

        ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc,
                      format("-D COPY_TO_MASK -D T=%s -D scn=%d -D mcn=%d",
                             ocl::memopTypeToStr(depth()), cn, mcn));
        if (!k.empty())
        {
            k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
                   ocl::KernelArg::WriteOnly(dst));

            size_t globalsize[2] = { cols, rows };
            if (k.run(2, globalsize, NULL, false))
                return;
        }
    }

    Mat src = getMat(ACCESS_READ);
    src.copyTo(_dst, _mask);
}
Example #10
0
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat image_sums, temp;
    integral(_image, temp);

    if (temp.depth() == CV_64F)
        temp.convertTo(image_sums, CV_32F);
    else
        image_sums = temp;

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
                  format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    if (cn == 1)
    {
        float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area();

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result),
                templ.rows, templ.cols, templ_sum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0);
        templ_sum = sum(templ) / tsize.area();

        if (cn == 2)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1]);
        else if (cn==3)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2]);
        else
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]);
    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #11
0
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);

    if (channels > 4 || borderType != BORDER_DEFAULT)
        return false;

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if (depth == CV_64F && !doubleSupport)
        return false;

    Size ssize = _src.size();
    if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
        return false;

    UMat src = _src.getUMat();
    Size dsize = Size(ssize.width * 2, ssize.height * 2);
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    const int local_size = 16;
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
            ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "",
            ocl::typeToStr(depth), channels, local_size
    );
    size_t globalThreads[2] = { dst.cols, dst.rows };
    size_t localThreads[2] = { local_size, local_size };
    ocl::Kernel k;
    if (ocl::Device::getDefault().isIntel() && channels == 1)
    {
        k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
        globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
    }
    else
        k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);

    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
    return k.run(2, globalThreads, localThreads, false);
}
Example #12
0
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if (cn > 4 || (depth == CV_64F && !doubleSupport))
        return false;

    Size ssize = _src.size();
    Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
    if (dsize.height < 2 || dsize.width < 2)
        return false;

    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
            std::abs(dsize.width*2 - ssize.width) <= 2 &&
            std::abs(dsize.height*2 - ssize.height) <= 2 );

    UMat src = _src.getUMat();
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    const int local_size = 256;
    int kercn = 1;
    if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel())
        kercn = 4;
    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
                                       "BORDER_REFLECT_101" };
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)),
            ocl::convertTypeStr(float_depth, depth, cn, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, cn, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth),
            cn, kercn, float_depth, borderMap[borderType], local_size
    );
    ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));

    size_t localThreads[2]  = { local_size/kercn, 1 };
    size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, (dst.rows + 1) / 2 };
    return k.run(2, globalThreads, localThreads, false);
}
Example #13
0
bool BackgroundSubtractorMOG2Impl::ocl_getBackgroundImage(OutputArray _backgroundImage) const
{
    _backgroundImage.create(frameSize, frameType);
    UMat dst = _backgroundImage.getUMat();

    int idxArg = 0;
    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_bgmodelUsedModes));
    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_weight));
    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_mean));
    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst));
    kernel_getBg.set(idxArg, backgroundRatio);

    size_t globalsize[2] = {(size_t)u_bgmodelUsedModes.cols, (size_t)u_bgmodelUsedModes.rows};

    return kernel_getBg.run(2, globalsize, NULL, false);
}
Example #14
0
static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
{
    _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);

    if (_image.channels() == 1)
        return(convolve_dft(_image, _templ, _result));
    else
    {
        UMat image = _image.getUMat();
        UMat templ = _templ.getUMat();
        UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
        bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
        if (ok==false)
            return false;
        UMat result = _result.getUMat();
        return (extractFirstChannel_32F(result_, _result, _image.channels()));
    }
}
Example #15
0
static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
                           const Mat & kd, const Mat & ks, double scale, double delta,
                           int borderType, int depth, int ddepth)
{
    int iscale = cvRound(scale), idelta = cvRound(delta);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
            floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
    int cn = _src.channels(), wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;

    if (!doubleSupport && wdepth == CV_64F)
        return false;

    char cvt[2][40];
    ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
                  format("-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
                         "-D convertToWT=%s -D convertToDT=%s%s",
                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
                         ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
                         ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
                         ocl::typeToStr(wdepth), wdepth,
                         ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
                         ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat d2x, d2y;
    sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
    sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);

    UMat dst = _dst.getUMat();

    ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
            d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
            dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);

    if (wdepth >= CV_32F)
        k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
    else
        k.args(d2xarg, d2yarg, dstarg, iscale, idelta);

    size_t globalsize[] = { dst.cols * cn / kercn, dst.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #16
0
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);

    if (channels > 4 || borderType != BORDER_DEFAULT)
        return false;

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if ((depth == CV_64F) && !(doubleSupport))
        return false;

    Size ssize = _src.size();
    Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
            std::abs(dsize.width*2 - ssize.width) <= 2 &&
            std::abs(dsize.height*2 - ssize.height) <= 2 );

    UMat src = _src.getUMat();
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
            ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "",
            ocl::typeToStr(depth), channels
    );
    ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));

    size_t localThreads[2]  = { 256, 1 };
    size_t globalThreads[2] = { src.cols, dst.rows };
    return k.run(2, globalThreads, localThreads, false);
}
Example #17
0
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
    ocl::Device dev = ocl::Device::getDefault();
    int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;

    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
                  format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
                         ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(src.size(), type);
    UMat dst = _dst.getUMat();

    if (depth <= CV_32S)
        thresh = cvFloor(thresh);

    const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
    double min_val = min_vals[depth];

    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));

    size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
    globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
    return k.run(2, globalsize, NULL, false);
}
Example #18
0
void UMat::copyTo(OutputArray _dst) const
{
    int dtype = _dst.type();
    if( _dst.fixedType() && dtype != type() )
    {
        CV_Assert( channels() == CV_MAT_CN(dtype) );
        convertTo( _dst, dtype );
        return;
    }

    if( empty() )
    {
        _dst.release();
        return;
    }

    size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
    for( i = 0; i < (size_t)dims; i++ )
        sz[i] = size.p[i];
    sz[dims-1] *= esz;
    ndoffset(srcofs);
    srcofs[dims-1] *= esz;

    _dst.create( dims, size.p, type() );
    if( _dst.isUMat() )
    {
        UMat dst = _dst.getUMat();
        if( u == dst.u && dst.offset == offset )
            return;

        if (u->currAllocator == dst.u->currAllocator)
        {
            dst.ndoffset(dstofs);
            dstofs[dims-1] *= esz;
            u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
            return;
        }
    }

    Mat dst = _dst.getMat();
    u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p);
}
Example #19
0
bool BackgroundSubtractorMOG2Impl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
    ++nframes;
    learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
    CV_Assert(learningRate >= 0);

    _fgmask.create(_image.size(), CV_8U);
    UMat fgmask = _fgmask.getUMat();

    const double alpha1 = 1.0f - learningRate;

    UMat frame = _image.getUMat();

    float varMax = MAX(fVarMin, fVarMax);
    float varMin = MIN(fVarMin, fVarMax);

    int idxArg = 0;
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame));
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_bgmodelUsedModes));
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_weight));
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_mean));
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_variance));
    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask));

    idxArg = kernel_apply.set(idxArg, (float)learningRate);        //alphaT
    idxArg = kernel_apply.set(idxArg, (float)alpha1);
    idxArg = kernel_apply.set(idxArg, (float)(-learningRate*fCT));   //prune

    idxArg = kernel_apply.set(idxArg, (float)varThreshold); //c_Tb
    idxArg = kernel_apply.set(idxArg, backgroundRatio);     //c_TB
    idxArg = kernel_apply.set(idxArg, varThresholdGen);     //c_Tg
    idxArg = kernel_apply.set(idxArg, varMin);
    idxArg = kernel_apply.set(idxArg, varMax);
    idxArg = kernel_apply.set(idxArg, fVarInit);
    idxArg = kernel_apply.set(idxArg, fTau);
    if (bShadowDetection)
        kernel_apply.set(idxArg, nShadowDetection);

    size_t globalsize[] = {(size_t)frame.cols, (size_t)frame.rows, 1};
    return kernel_apply.run(2, globalsize, NULL, true);
}
Example #20
0
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
{
    int type = _src.type(), cn = CV_MAT_CN(type);

    if (cn > 4 || cn == 3)
        return false;

    const char * kernelName;
    int flipType;

    if (flipCode == 0)
        kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
    else if (flipCode > 0)
        kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
    else
        kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;

    Size size = _src.size();
    int cols = size.width, rows = size.height;
    if ((cols == 1 && flipType == FLIP_COLS) ||
            (rows == 1 && flipType == FLIP_ROWS) ||
            (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
    {
        _src.copyTo(_dst);
        return true;
    }

    ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
        format( "-D type=%s", ocl::memopTypeToStr(type)));
    if (k.empty())
        return false;

    _dst.create(size, type);
    UMat src = _src.getUMat(), dst = _dst.getUMat();

    cols = flipType == FLIP_COLS ? ((cols+1)/2) : cols;
    rows = flipType & FLIP_ROWS ? ((rows+1)/2) : rows;

    size_t globalsize[2] = { cols, rows };
    return k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), rows, cols).run(2, globalsize, NULL, false);
}
Example #21
0
static bool ocl_blendLinear( InputArray _src1, InputArray _src2, InputArray _weights1, InputArray _weights2, OutputArray _dst )
{
    int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    char cvt[30];
    ocl::Kernel k("blendLinear", ocl::imgproc::blend_linear_oclsrc,
                  format("-D T=%s -D cn=%d -D convertToT=%s", ocl::typeToStr(depth),
                         cn, ocl::convertTypeStr(CV_32F, depth, 1, cvt)));
    if (k.empty())
        return false;

    UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), weights1 = _weights1.getUMat(),
            weights2 = _weights2.getUMat(), dst = _dst.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2),
           ocl::KernelArg::ReadOnlyNoSize(weights1), ocl::KernelArg::ReadOnlyNoSize(weights2),
           ocl::KernelArg::WriteOnly(dst));

    size_t globalsize[2] = { (size_t)dst.cols, (size_t)dst.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #22
0
    static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights)
    {
        int cn = _src.channels();
        ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc,
                      format("-D cn=%d", cn));
        if (k.empty())
            return false;

        UMat src = _src.getUMat();
        _dst.create(src.size(), src.type());
        _dst.setTo(Scalar::all(0));
        UMat dst = _dst.getUMat();

        const int ksize = (btvKernelSize - 1) / 2;

        k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
              ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights));

        size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows };
        return k.run(2, globalsize, NULL, false);
    }
Example #23
0
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);

    char cvt[40];
    ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
                  format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
                         ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
    if (k.empty())
        return false;

    UMat image = _image.getUMat(), templ = _templ.getUMat();
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #24
0
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
{
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);

    ocl::Device dev = ocl::Device::getDefault();
    int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
    int rated_cn = cn;
    int wtype1 = wtype;

    if (pxPerWIx!=1)
    {
        rated_cn = pxPerWIx;
        type = CV_MAKE_TYPE(depth, rated_cn);
        wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
    }

    char cvt[40];
    char cvt1[40];
    const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
    const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);

    ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
                  format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
                         convertToWT, convertToWT1, cn, pxPerWIx));
    if (k.empty())
        return false;

    UMat image = _image.getUMat(), templ = _templ.getUMat();
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    UMat result = _result.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

    size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows};
    return k.run(2, globalsize, NULL, false);
}
Example #25
0
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);

    if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT)
        return false;

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if (depth == CV_64F && !doubleSupport)
        return false;

    Size ssize = _src.size();
    if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
        return false;

    UMat src = _src.getUMat();
    Size dsize = Size(ssize.width * 2, ssize.height * 2);
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    char cvt[2][50];
    ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc,
                 format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
                 ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
                 ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
                 ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
                 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
    size_t globalThreads[2] = {dst.cols, dst.rows};
    size_t localThreads[2]  = {16, 16};

    return k.run(2, globalThreads, localThreads, false);
}
Example #26
0
/* dst = src */
void Mat::copyTo( OutputArray _dst ) const
{
    int dtype = _dst.type();
    if( _dst.fixedType() && dtype != type() )
    {
        CV_Assert( channels() == CV_MAT_CN(dtype) );
        convertTo( _dst, dtype );
        return;
    }

    if( empty() )
    {
        _dst.release();
        return;
    }

    if( _dst.isUMat() )
    {
        _dst.create( dims, size.p, type() );
        UMat dst = _dst.getUMat();

        size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
        for( i = 0; i < (size_t)dims; i++ )
            sz[i] = size.p[i];
        sz[dims-1] *= esz;
        dst.ndoffset(dstofs);
        dstofs[dims-1] *= esz;
        dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p);
        return;
    }

    if( dims <= 2 )
    {
        _dst.create( rows, cols, type() );
        Mat dst = _dst.getMat();
        if( data == dst.data )
            return;

        if( rows > 0 && cols > 0 )
        {
            const uchar* sptr = data;
            uchar* dptr = dst.data;

            Size sz = getContinuousSize(*this, dst);
            size_t len = sz.width*elemSize();

            for( ; sz.height--; sptr += step, dptr += dst.step )
                memcpy( dptr, sptr, len );
        }
        return;
    }

    _dst.create( dims, size, type() );
    Mat dst = _dst.getMat();
    if( data == dst.data )
        return;

    if( total() != 0 )
    {
        const Mat* arrays[] = { this, &dst };
        uchar* ptrs[2];
        NAryMatIterator it(arrays, ptrs, 2);
        size_t sz = it.size*elemSize();

        for( size_t i = 0; i < it.nplanes; i++, ++it )
            memcpy(ptrs[1], ptrs[0], sz);
    }
}
Example #27
0
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
                      int aperture_size, bool L2gradient, int cn, const Size & size)
{
    CV_INSTRUMENT_REGION_OPENCL()

    UMat map;

    const ocl::Device &dev = ocl::Device::getDefault();
    int max_wg_size = (int)dev.maxWorkGroupSize();

    int lSizeX = 32;
    int lSizeY = max_wg_size / 32;

    if (lSizeY == 0)
    {
        lSizeX = 16;
        lSizeY = max_wg_size / 16;
    }
    if (lSizeY == 0)
    {
        lSizeY = 1;
    }

    if (aperture_size == 7)
    {
        low_thresh = low_thresh / 16.0f;
        high_thresh = high_thresh / 16.0f;
    }

    if (L2gradient)
    {
        low_thresh = std::min(32767.0f, low_thresh);
        high_thresh = std::min(32767.0f, high_thresh);

        if (low_thresh > 0)
            low_thresh *= low_thresh;
        if (high_thresh > 0)
            high_thresh *= high_thresh;
    }
    int low = cvFloor(low_thresh), high = cvFloor(high_thresh);

    if (!useCustomDeriv &&
        aperture_size == 3 && !_src.isSubmatrix())
    {
        /*
            stage1_with_sobel:
                Sobel operator
                Calc magnitudes
                Non maxima suppression
                Double thresholding
        */
        char cvt[40];
        ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
                               format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
                                      cn, ocl::memopTypeToStr(_src.depth()),
                                      ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
                                      ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
                                      lSizeX, lSizeY,
                                      L2gradient ? " -D L2GRAD" : ""));
        if (with_sobel.empty())
            return false;

        UMat src = _src.getUMat();
        map.create(size, CV_32S);
        with_sobel.args(ocl::KernelArg::ReadOnly(src),
                        ocl::KernelArg::WriteOnlyNoSize(map),
                        (float) low, (float) high);

        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };

        if (!with_sobel.run(2, globalsize, localsize, false))
            return false;
    }
    else
    {
        /*
            stage1_without_sobel:
                Calc magnitudes
                Non maxima suppression
                Double thresholding
        */
        double scale = 1.0;
        if (aperture_size == 7)
        {
            scale = 1 / 16.0;
        }

        UMat dx, dy;
        if (!useCustomDeriv)
        {
            Sobel(_src, dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
            Sobel(_src, dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
        }
        else
        {
            dx = dx_;
            dy = dy_;
        }

        ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
                                    format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
                                           cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
        if (without_sobel.empty())
            return false;

        map.create(size, CV_32S);
        without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
                           ocl::KernelArg::WriteOnly(map),
                           low, high);

        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };

        if (!without_sobel.run(2, globalsize, localsize, false))
            return false;
    }

    int PIX_PER_WI = 8;
    /*
        stage2:
            hysteresis (add weak edges if they are connected with strong edges)
    */

    int sizey = lSizeY / PIX_PER_WI;
    if (sizey == 0)
        sizey = 1;

    size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };

    ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
                                format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
                                PIX_PER_WI, lSizeX, sizey));

    if (edgesHysteresis.empty())
        return false;

    edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
    if (!edgesHysteresis.run(2, globalsize, localsize, false))
        return false;

    // get edges

    ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
                                format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
    if (getEdgesKernel.empty())
        return false;

    _dst.create(size, CV_8UC1);
    UMat dst = _dst.getUMat();

    getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));

    return getEdgesKernel.run(2, globalsize, NULL, false);
}
Example #28
0
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat temp, image_sums, image_sqsums;
    integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
        format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    float scale = 1.f / tsize.area();

    if (cn == 1)
    {
        float templ_sum = (float)sum(templ)[0];

        multiply(templ, templ, temp, 1, CV_32F);
        float templ_sqsum = (float)sum(temp)[0];

        templ_sqsum -= scale * templ_sum * templ_sum;
        templ_sum   *= scale;

        if (templ_sqsum < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return true;
        }

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
                      ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
        templ_sum = sum(templ);

        multiply(templ, templ, temp, 1, CV_32F);
        templ_sqsum = sum(temp);

        float templ_sqsum_sum = 0;
        for (int i = 0; i < cn; i ++)
            templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];

        templ_sum *= scale;

        if (templ_sqsum_sum < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return true;
        }

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
                   ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
                   templ_sum, templ_sqsum_sum);    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #29
0
static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
{
    ConvolveBuf buf;
    CV_Assert(_image.type() == CV_32F);
    CV_Assert(_templ.type() == CV_32F);

    buf.create(_image.size(), _templ.size());
    _result.create(buf.result_size, CV_32F);

    UMat image  = _image.getUMat();
    UMat templ  = _templ.getUMat();

    UMat result = _result.getUMat();

    Size& block_size = buf.block_size;
    Size& dft_size = buf.dft_size;

    UMat& image_block = buf.image_block;
    UMat& templ_block = buf.templ_block;
    UMat& result_data = buf.result_data;

    UMat& image_spect = buf.image_spect;
    UMat& templ_spect = buf.templ_spect;
    UMat& result_spect = buf.result_spect;

    UMat templ_roi = templ;
    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
                   templ_block.cols - templ_roi.cols, BORDER_ISOLATED);

    dft(templ_block, templ_spect, 0, templ.rows);

    // Process all blocks of the result matrix
    for (int y = 0; y < result.rows; y += block_size.height)
    {
        for (int x = 0; x < result.cols; x += block_size.width)
        {
            Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
                                std::min(y + dft_size.height, image.rows) - y);
            Rect roi0(x, y, image_roi_size.width, image_roi_size.height);

            UMat image_roi(image, roi0);

            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
                           0, image_block.cols - image_roi.cols, BORDER_ISOLATED);

            dft(image_block, image_spect, 0);

            mulSpectrums(image_spect, templ_spect, result_spect, 0, true);

            dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);

            Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
                                 std::min(y + block_size.height, result.rows) - y);

            Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
            Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);

            UMat result_roi(result, roi1);
            UMat result_block(result_data, roi2);

            result_block.copyTo(result_roi);
        }
    }
    return true;
}
Example #30
0
bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptors)
{
    int dsize = params->descriptorSize();
    int nFeatures = keypoints.cols;
    if (nFeatures == 0)
    {
        _descriptors.release();
        return true;
    }
    _descriptors.create(nFeatures, dsize, CV_32F);
    UMat descriptors;
    if( _descriptors.isUMat() )
        descriptors = _descriptors.getUMat();
    else
        descriptors.create(nFeatures, dsize, CV_32F);

    ocl::Kernel kerCalcDesc, kerNormDesc;

    if( dsize == 64 )
    {
        kerCalcDesc.create("SURF_computeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts);
        kerNormDesc.create("SURF_normalizeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts);
    }
    else
    {
        CV_Assert(dsize == 128);
        kerCalcDesc.create("SURF_computeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts);
        kerNormDesc.create("SURF_normalizeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts);
    }

    size_t localThreads[] = {6, 6};
    size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]};

    if(haveImageSupport)
    {
        kerCalcDesc.args(imgTex,
                         img_rows, img_cols,
                         ocl::KernelArg::ReadOnlyNoSize(keypoints),
                         ocl::KernelArg::WriteOnlyNoSize(descriptors));
    }
    else
    {
        kerCalcDesc.args(ocl::KernelArg::ReadOnlyNoSize(img),
                         img_rows, img_cols,
                         ocl::KernelArg::ReadOnlyNoSize(keypoints),
                         ocl::KernelArg::WriteOnlyNoSize(descriptors));
    }

    if(!kerCalcDesc.run(2, globalThreads, localThreads, true))
        return false;

    size_t localThreads_n[] = {dsize, 1};
    size_t globalThreads_n[] = {nFeatures*localThreads_n[0], localThreads_n[1]};

    globalThreads[0] = nFeatures * localThreads[0];
    globalThreads[1] = localThreads[1];
    bool ok = kerNormDesc.args(ocl::KernelArg::ReadWriteNoSize(descriptors)).
                        run(2, globalThreads_n, localThreads_n, true);
    if(ok && !_descriptors.isUMat())
        descriptors.copyTo(_descriptors);
    return ok;
}