Пример #1
0
static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    int type = _image.type(), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
                  format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
    if (k.empty())
        return false;

    UMat image = _image.getUMat(), templ = _templ.getUMat();
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    UMat image_sums, image_sqsums;
    integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);

    UMat templ_sqsum;
    if (!sumTemplate(_templ, templ_sqsum))
        return false;

    k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
           templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));

    size_t globalsize[2] = { result.cols, result.rows };

    return k.run(2, globalsize, NULL, false);
}
Пример #2
0
static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha,
                            InputArray _mask, int op_type )
{
    CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE ||
              op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);

    int stype = _src.type(), cn = CV_MAT_CN(stype);
    int sdepth = CV_MAT_DEPTH(stype), ddepth = _dst.depth();

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
            haveMask = !_mask.empty();

    if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
        return false;

    const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT",
                                   "ACCUMULATE_WEIGHTED" };

    ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc,
                  format("-D %s%s -D srcT=%s -D cn=%d -D dstT=%s%s",
                         opMap[op_type], haveMask ? " -D HAVE_MASK" : "",
                         ocl::typeToStr(sdepth), cn, ocl::typeToStr(ddepth),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat();

    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
            src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
            dstarg = ocl::KernelArg::ReadWrite(dst),
            maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);

    int argidx = k.set(0, srcarg);
    if (op_type == ACCUMULATE_PRODUCT)
        argidx = k.set(argidx, src2arg);
    argidx = k.set(argidx, dstarg);
    if (op_type == ACCUMULATE_WEIGHTED)
    {
        if (ddepth == CV_32F)
            argidx = k.set(argidx, (float)alpha);
        else
            argidx = k.set(argidx, alpha);
    }
    if (haveMask)
        k.set(argidx, maskarg);

    size_t globalsize[2] = { src.cols, src.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #3
0
static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
{
    UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);

    int type = src1.type(), depth = CV_MAT_DEPTH(type),
            kercn = ocl::predictOptimalVectorWidth(src1, src2);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !doubleSupport && depth == CV_64F )
        return false;

    int dbsize = ocl::Device::getDefault().maxComputeUnits();
    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
    int ddepth = std::max(CV_32F, depth);

    int wgs2_aligned = 1;
    while (wgs2_aligned < (int)wgs)
        wgs2_aligned <<= 1;
    wgs2_aligned >>= 1;

    char cvt[40];
    ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
                  format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
                         "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
                         ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
                         ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
                         (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
                         _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
                         _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
    if (k.empty())
        return false;

    UMat db(1, dbsize, ddepth);

    ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
            src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
            dbarg = ocl::KernelArg::PtrWriteOnly(db);

    k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);

    size_t globalsize = dbsize * wgs;
    if (k.run(1, &globalsize, &wgs, false))
    {
        res = sum(db.getMat(ACCESS_READ))[0];
        return true;
    }
    return false;
}
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
                                      float h, float hForColorComponents,
                                      int templateWindowSize, int searchWindowSize)
{
    UMat src = _src.getUMat();
    _dst.create(src.size(), src.type());
    UMat dst = _dst.getUMat();

    UMat src_lab;
    cvtColor(src, src_lab, COLOR_LBGR2Lab);

    UMat l(src.size(), CV_8U);
    UMat ab(src.size(), CV_8UC2);
    std::vector<UMat> l_ab(2), l_ab_denoised(2);
    l_ab[0] = l;
    l_ab[1] = ab;
    l_ab_denoised[0].create(src.size(), CV_8U);
    l_ab_denoised[1].create(src.size(), CV_8UC2);

    int from_to[] = { 0,0, 1,1, 2,2 };
    mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);

    fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);
    fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);

    UMat dst_lab(src.size(), CV_8UC3);
    mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);

    cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels());
    return true;
}
Пример #5
0
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
{
    if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0  ||
         !(sdepth == CV_32S || sdepth == CV_32F) )
        return false;

    ocl::Kernel k1("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc,
                   format("-D sdepth=%d", sdepth));
    if (k1.empty())
        return false;

    Size size = _src.size(), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width),
            ssize(size.width + 1, size.height + 1);
    _sum.create(ssize, sdepth);
    UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat();
    t_sum = t_sum(Range::all(), Range(0, size.height));

    int offset = (int)src.offset / vlen, pre_invalid = (int)src.offset % vlen;
    int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
    int sum_offset = (int)sum.offset / vlen;

    k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
            offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step);
    size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
    if (!k1.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel k2("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc,
                   format("-D sdepth=%d", sdepth));
    k2.args(ocl::KernelArg::PtrReadWrite(t_sum), ocl::KernelArg::PtrWriteOnly(sum),
            t_sum.rows, t_sum.cols, (int)t_sum.step, (int)sum.step, sum_offset);

    size_t gt2 = t_sum.cols  * 32, lt2 = 256;
    return k2.run(1, &gt2, &lt2, false);
}
Пример #6
0
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat image_sums, temp;
    integral(_image, image_sums, CV_32F);

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
                  format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ  = _templ.getUMat();
    UMat result = _result.getUMat();

    if (cn==1)
    {
        Scalar templMean = mean(templ);
        float templ_sum = (float)templMean[0];

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0);
        templ_sum = (Vec4f)mean(templ);

       k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #7
0
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
                  format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type],
                         ocl::typeToStr(ktype), ocl::typeToStr(depth),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(src.size(), type);
    UMat dst = _dst.getUMat();

    if (depth <= CV_32S)
        thresh = cvFloor(thresh);

    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))));

    size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #8
0
    void detect( InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask )
    {
        CV_INSTRUMENT_REGION()

        std::vector<Point2f> corners;

        if (_image.isUMat())
        {
            UMat ugrayImage;
            if( _image.type() != CV_8U )
                cvtColor( _image, ugrayImage, COLOR_BGR2GRAY );
            else
                ugrayImage = _image.getUMat();

            goodFeaturesToTrack( ugrayImage, corners, nfeatures, qualityLevel, minDistance, _mask,
                                 blockSize, useHarrisDetector, k );
        }
        else
        {
            Mat image = _image.getMat(), grayImage = image;
            if( image.type() != CV_8U )
                cvtColor( image, grayImage, COLOR_BGR2GRAY );

            goodFeaturesToTrack( grayImage, corners, nfeatures, qualityLevel, minDistance, _mask,
                                blockSize, useHarrisDetector, k );
        }

        keypoints.resize(corners.size());
        std::vector<Point2f>::const_iterator corner_it = corners.begin();
        std::vector<KeyPoint>::iterator keypoint_it = keypoints.begin();
        for( ; corner_it != corners.end(); ++corner_it, ++keypoint_it )
            *keypoint_it = KeyPoint( *corner_it, (float)blockSize );

    }
Пример #9
0
static bool sumTemplate(InputArray _src, UMat & result)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();

    int wgs2_aligned = 1;
    while (wgs2_aligned < (int)wgs)
        wgs2_aligned <<= 1;
    wgs2_aligned >>= 1;

    char cvt[40];
    ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
                  format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
                         ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
                         ocl::convertTypeStr(depth, wdepth, cn, cvt),
                         (int)wgs, wgs2_aligned));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    result.create(1, 1, CV_32FC1);

    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
            resarg = ocl::KernelArg::PtrWriteOnly(result);

    k.args(srcarg, src.cols, (int)src.total(), resarg);

    size_t globalsize = wgs;
    return k.run(1, &globalsize, &wgs, false);
}
Пример #10
0
static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
{
    _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);

    if (_image.channels() == 1)
        return(convolve_dft(_image, _templ, _result));
    else
    {
        UMat image = _image.getUMat();
        UMat templ = _templ.getUMat();
        UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
        bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
        if (ok==false)
            return false;
        UMat result = _result.getUMat();
        return (extractFirstChannel_32F(result_, _result, _image.channels()));
    }
}
Пример #11
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 ||
         (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    char cvt[40];
    String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s",
                         sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth),
                         ocl::convertTypeStr(sdepth, sqdepth, 4, cvt),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k1.empty())
        return false;

    Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1),
            t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width);
    UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth);
    t_sum = t_sum(Range::all(), Range(0, size.height));
    t_sqsum = t_sqsum(Range::all(), Range(0, size.height));

    _sum.create(dsize, sdepth);
    _sqsum.create(dsize, sqdepth);
    UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat();

    int offset = src.offset / vlen;
    int pre_invalid = src.offset % vlen;
    int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
    int sum_offset = sum.offset / sum.elemSize();
    int sqsum_offset = sqsum.offset / sqsum.elemSize();
    CV_Assert(sqsum.offset % sqsum.elemSize() == 0);

    k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
            ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows,
            src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step);

    size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
    if (!k1.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts);
    if (k2.empty())
        return false;

    k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum),
            ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum),
            t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step,
            (int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset);

    size_t gt2 = t_sum.cols  * 32, lt2 = 256;
    return k2.run(1, &gt2, &lt2, false);
}
Пример #12
0
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
                                  InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
{
    const ocl::Device & dev = ocl::Device::getDefault();
    int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
         (_src.offset() == 0) && (_src.step() % 4 == 0) &&
         (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
        return false;

    Mat kernelX = _kernelX.getMat().reshape(1, 1);
    if (kernelX.cols % 2 != 1)
        return false;
    Mat kernelY = _kernelY.getMat().reshape(1, 1);
    if (kernelY.cols % 2 != 1)
        return false;

    if (ddepth < 0)
        ddepth = sdepth;

    Size size = _src.size();
    size_t globalsize[2] = { 0, 0 };
    size_t localsize[2] = { 0, 0 };

    globalsize[0] = size.width / 16;
    globalsize[1] = size.height / 2;

    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
    char build_opts[1024];
    sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
            ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
            ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());

    ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
    if (kernel.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(size, CV_MAKETYPE(ddepth, cn));
    if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
        return false;
    UMat dst = _dst.getUMat();

    int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
    idxArg = kernel.set(idxArg, (int)src.step);
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    idxArg = kernel.set(idxArg, (int)dst.step);
    idxArg = kernel.set(idxArg, (int)dst.rows);
    idxArg = kernel.set(idxArg, (int)dst.cols);
    idxArg = kernel.set(idxArg, static_cast<float>(delta));

    return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
Пример #13
0
static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
{
    if (useNaive(_templ.size()))
        return( matchTemplateNaive_CCORR(_image, _templ, _result));
    else
    {
        if(_image.depth() == CV_8U)
        {
            UMat imagef, templf;
            UMat image = _image.getUMat();
            UMat templ = _templ.getUMat();
            image.convertTo(imagef, CV_32F);
            templ.convertTo(templf, CV_32F);
            return(convolve_32F(imagef, templf, _result));
        }
        else
        {
            return(convolve_32F(_image, _templ, _result));
        }
    }
}
Пример #14
0
static bool ocl_blendLinear( InputArray _src1, InputArray _src2, InputArray _weights1, InputArray _weights2, OutputArray _dst )
{
    int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    char cvt[30];
    ocl::Kernel k("blendLinear", ocl::imgproc::blend_linear_oclsrc,
                  format("-D T=%s -D cn=%d -D convertToT=%s", ocl::typeToStr(depth),
                         cn, ocl::convertTypeStr(CV_32F, depth, 1, cvt)));
    if (k.empty())
        return false;

    UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), weights1 = _weights1.getUMat(),
            weights2 = _weights2.getUMat(), dst = _dst.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2),
           ocl::KernelArg::ReadOnlyNoSize(weights1), ocl::KernelArg::ReadOnlyNoSize(weights2),
           ocl::KernelArg::WriteOnly(dst));

    size_t globalsize[2] = { (size_t)dst.cols, (size_t)dst.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #15
0
static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst)
{
    UMat src = _src.getUMat(), dst = _dst.getUMat();

    for (int y = 0; y < ny; ++y)
        for (int x = 0; x < nx; ++x)
        {
            Rect roi(x * src.cols, y * src.rows, src.cols, src.rows);
            UMat hdr(dst, roi);
            src.copyTo(hdr);
        }
    return true;
}
Пример #16
0
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);

    char cvt[40];
    ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
                  format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
                         ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
    if (k.empty())
        return false;

    UMat image = _image.getUMat(), templ = _templ.getUMat();
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #17
0
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
{
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);

    ocl::Device dev = ocl::Device::getDefault();
    int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
    int rated_cn = cn;
    int wtype1 = wtype;

    if (pxPerWIx!=1)
    {
        rated_cn = pxPerWIx;
        type = CV_MAKE_TYPE(depth, rated_cn);
        wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
    }

    char cvt[40];
    char cvt1[40];
    const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
    const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);

    ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
                  format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
                         convertToWT, convertToWT1, cn, pxPerWIx));
    if (k.empty())
        return false;

    UMat image = _image.getUMat(), templ = _templ.getUMat();
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    UMat result = _result.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

    size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows};
    return k.run(2, globalsize, NULL, false);
}
Пример #18
0
static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight,
        InputOutputArray _dst, InputOutputArray _dst_weight)
{
    String buildOptions = "-D DEFINE_feed";
    ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src);
    ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
    ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst);
    ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight);
    ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions);
    if (k.empty())
        return false;

    UMat src = _src.getUMat();

    k.args(ocl::KernelArg::ReadOnly(src),
           ocl::KernelArg::ReadOnly(_weight.getUMat()),
           ocl::KernelArg::ReadWrite(_dst.getUMat()),
           ocl::KernelArg::ReadWrite(_dst_weight.getUMat())
           );

    size_t globalsize[2] = {(size_t)src.cols, (size_t)src.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #19
0
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat image_sums, temp;
    integral(_image, temp);

    if (temp.depth() == CV_64F)
        temp.convertTo(image_sums, CV_32F);
    else
        image_sums = temp;

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
                  format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    if (cn == 1)
    {
        float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area();

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result),
                templ.rows, templ.cols, templ_sum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0);
        templ_sum = sum(templ) / tsize.area();

        if (cn == 2)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1]);
        else if (cn==3)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2]);
        else
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]);
    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #20
0
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);

    if (channels > 4 || borderType != BORDER_DEFAULT)
        return false;

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if (depth == CV_64F && !doubleSupport)
        return false;

    Size ssize = _src.size();
    if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
        return false;

    UMat src = _src.getUMat();
    Size dsize = Size(ssize.width * 2, ssize.height * 2);
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    const int local_size = 16;
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
            ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "",
            ocl::typeToStr(depth), channels, local_size
    );
    size_t globalThreads[2] = { dst.cols, dst.rows };
    size_t localThreads[2] = { local_size, local_size };
    ocl::Kernel k;
    if (ocl::Device::getDefault().isIntel() && channels == 1)
    {
        k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
        globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
    }
    else
        k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);

    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
    return k.run(2, globalThreads, localThreads, false);
}
Пример #21
0
static bool ocl_updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi,
                                     float timestamp, float delbound )
{
    ocl::Kernel k("updateMotionHistory", ocl::video::updatemotionhistory_oclsrc);
    if (k.empty())
        return false;

    UMat silh = _silhouette.getUMat(), mhi = _mhi.getUMat();

    k.args(ocl::KernelArg::ReadOnlyNoSize(silh), ocl::KernelArg::ReadWrite(mhi),
           timestamp, delbound);

    size_t globalsize[2] = { silh.cols, silh.rows };
    return k.run(2, globalsize, NULL, false);
}
Пример #22
0
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if (cn > 4 || (depth == CV_64F && !doubleSupport))
        return false;

    Size ssize = _src.size();
    Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
    if (dsize.height < 2 || dsize.width < 2)
        return false;

    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
            std::abs(dsize.width*2 - ssize.width) <= 2 &&
            std::abs(dsize.height*2 - ssize.height) <= 2 );

    UMat src = _src.getUMat();
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    const int local_size = 256;
    int kercn = 1;
    if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel())
        kercn = 4;
    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
                                       "BORDER_REFLECT_101" };
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)),
            ocl::convertTypeStr(float_depth, depth, cn, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, cn, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth),
            cn, kercn, float_depth, borderMap[borderType], local_size
    );
    ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));

    size_t localThreads[2]  = { local_size/kercn, 1 };
    size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, (dst.rows + 1) / 2 };
    return k.run(2, globalThreads, localThreads, false);
}
Пример #23
0
UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf)
{
    switch (arr.kind())
    {
    case _InputArray::GPU_MAT:
        arr.getGpuMat().download(buf);
        return buf;

    case _InputArray::OPENGL_BUFFER:
        arr.getOGlBuffer().copyTo(buf);
        return buf;

    default:
        return arr.getUMat();
    }
}
Пример #24
0
void UMat::copyTo(OutputArray _dst, InputArray _mask) const
{
    if( _mask.empty() )
    {
        copyTo(_dst);
        return;
    }
#ifdef HAVE_OPENCL
    int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
    CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );

    if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
    {
        UMatData * prevu = _dst.getUMat().u;
        _dst.create( dims, size, type() );

        UMat dst = _dst.getUMat();

        bool haveDstUninit = false;
        if( prevu != dst.u ) // do not leave dst uninitialized
            haveDstUninit = true;

        String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
                             ocl::memopTypeToStr(depth()), cn, mcn,
                             haveDstUninit ? " -D HAVE_DST_UNINIT" : "");

        ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
        if (!k.empty())
        {
            k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
                   ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
                   haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
                                   ocl::KernelArg::ReadWrite(dst));

            size_t globalsize[2] = { cols, rows };
            if (k.run(2, globalsize, NULL, false))
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
                return;
            }
        }
    }
#endif
    Mat src = getMat(ACCESS_READ);
    src.copyTo(_dst, _mask);
}
Пример #25
0
static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
{
    const ocl::Device & d = ocl::Device::getDefault();

#ifdef __ANDROID__
    if (d.isNVidia())
        return false;
#endif
    const int cn = _src.channels();
    if (cn > 4)
        return false;
    int type = _src.type(), depth = CV_MAT_DEPTH(type);
    bool doubleSupport = d.doubleFPConfig() > 0,
            haveMask = _mask.kind() != _InputArray::NONE;

    if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    UMat src = _src.getUMat();

    if (normType == NORM_INF)
    {
        if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
                           std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
            return false;
    }
    else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
    {
        Scalar sc;
        bool unstype = depth == CV_8U || depth == CV_16U;

        if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
                    OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
            return false;

        double s = 0.0;
        for (int i = 0; i < (haveMask ? cn : 1); ++i)
            s += sc[i];

        result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
    }

    return true;
}
Пример #26
0
    static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst)
    {
        ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc);
        if (k.empty())
            return false;

        UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
        _dst.create(src1.size(), src1.type());
        UMat dst = _dst.getUMat();

        int cn = src1.channels();
        k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
               ocl::KernelArg::ReadOnlyNoSize(src2),
               ocl::KernelArg::WriteOnly(dst, cn));

        size_t globalsize[2] = { (size_t)src1.cols * cn, (size_t)src1.rows };
        return k.run(2, globalsize, NULL, false);
    }
Пример #27
0
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
    ocl::Device dev = ocl::Device::getDefault();
    int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;

    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
                  format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
                         ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(src.size(), type);
    UMat dst = _dst.getUMat();

    if (depth <= CV_32S)
        thresh = cvFloor(thresh);

    const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
    double min_val = min_vals[depth];

    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));

    size_t globalsize[2] = { static_cast<size_t>(dst.cols * cn / kercn), static_cast<size_t>(dst.rows) };
    globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
    return k.run(2, globalsize, NULL, false);
}
Пример #28
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    static const int tileSize = 16;

    String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
                                ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
                                tileSize,
                                doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (kcols.empty())
        return false;

    UMat src = _src.getUMat();
    Size src_size = src.size();
    Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
    UMat buf(bufsize, sdepth);
    UMat buf_sq(bufsize, sqdepth);
    kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
    size_t gt = src.cols, lt = tileSize;
    if (!kcols.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (krows.empty())
        return false;

    Size sumsize(src_size.width + 1, src_size.height + 1);
    _sum.create(sumsize, sdepth);
    UMat sum = _sum.getUMat();
    _sqsum.create(sumsize, sqdepth);
    UMat sum_sq = _sqsum.getUMat();

    krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
    gt = src.rows;
    return krows.run(1, &gt, &lt, false);
}
Пример #29
0
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);

    if (channels > 4 || borderType != BORDER_DEFAULT)
        return false;

    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    if ((depth == CV_64F) && !(doubleSupport))
        return false;

    Size ssize = _src.size();
    Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
            std::abs(dsize.width*2 - ssize.width) <= 2 &&
            std::abs(dsize.height*2 - ssize.height) <= 2 );

    UMat src = _src.getUMat();
    _dst.create( dsize, src.type() );
    UMat dst = _dst.getUMat();

    int float_depth = depth == CV_64F ? CV_64F : CV_32F;
    char cvt[2][50];
    String buildOptions = format(
            "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
            "-D T1=%s -D cn=%d",
            ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
            ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
            ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "",
            ocl::typeToStr(depth), channels
    );
    ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));

    size_t localThreads[2]  = { 256, 1 };
    size_t globalThreads[2] = { src.cols, dst.rows };
    return k.run(2, globalThreads, localThreads, false);
}
Пример #30
0
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
{
    int type = _src.type(), cn = CV_MAT_CN(type);

    if (cn > 4 || cn == 3)
        return false;

    const char * kernelName;
    int flipType;

    if (flipCode == 0)
        kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
    else if (flipCode > 0)
        kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
    else
        kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;

    Size size = _src.size();
    int cols = size.width, rows = size.height;
    if ((cols == 1 && flipType == FLIP_COLS) ||
            (rows == 1 && flipType == FLIP_ROWS) ||
            (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
    {
        _src.copyTo(_dst);
        return true;
    }

    ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
        format( "-D type=%s", ocl::memopTypeToStr(type)));
    if (k.empty())
        return false;

    _dst.create(size, type);
    UMat src = _src.getUMat(), dst = _dst.getUMat();

    cols = flipType == FLIP_COLS ? ((cols+1)/2) : cols;
    rows = flipType & FLIP_ROWS ? ((rows+1)/2) : rows;

    size_t globalsize[2] = { cols, rows };
    return k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), rows, cols).run(2, globalsize, NULL, false);
}