static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
                                      float h, float hForColorComponents,
                                      int templateWindowSize, int searchWindowSize)
{
    UMat src = _src.getUMat();
    _dst.create(src.size(), src.type());
    UMat dst = _dst.getUMat();

    UMat src_lab;
    cvtColor(src, src_lab, COLOR_LBGR2Lab);

    UMat l(src.size(), CV_8U);
    UMat ab(src.size(), CV_8UC2);
    std::vector<UMat> l_ab(2), l_ab_denoised(2);
    l_ab[0] = l;
    l_ab[1] = ab;
    l_ab_denoised[0].create(src.size(), CV_8U);
    l_ab_denoised[1].create(src.size(), CV_8UC2);

    int from_to[] = { 0,0, 1,1, 2,2 };
    mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);

    fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);
    fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);

    UMat dst_lab(src.size(), CV_8UC3);
    mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);

    cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels());
    return true;
}
Example #2
0
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat image_sums, temp;
    integral(_image, image_sums, CV_32F);

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
                  format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ  = _templ.getUMat();
    UMat result = _result.getUMat();
    Size tsize = templ.size();

    if (cn==1)
    {
        Scalar templMean = mean(templ);
        float templ_sum = (float)templMean[0];

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0);
        templ_sum = (Vec4f)mean(templ);

       k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #3
0
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
                  format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type],
                         ocl::typeToStr(ktype), ocl::typeToStr(depth),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(src.size(), type);
    UMat dst = _dst.getUMat();

    if (depth <= CV_32S)
        thresh = cvFloor(thresh);

    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))));

    size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #4
0
OCL_PERF_TEST_P(stitch, b12, TEST_DETECTORS)
{
    UMat pano;

    vector<Mat> imgs;
    imgs.push_back( imread( getDataPath("stitching/b1.png") ) );
    imgs.push_back( imread( getDataPath("stitching/b2.png") ) );

    Ptr<detail::FeaturesFinder> featuresFinder = getFeatureFinder(GetParam());
    Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb"
            ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE)
            : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);

    declare.iterations(20);

    while(next())
    {
        Stitcher stitcher = Stitcher::createDefault();
        stitcher.setFeaturesFinder(featuresFinder);
        stitcher.setFeaturesMatcher(featuresMatcher);
        stitcher.setWarper(makePtr<SphericalWarper>());
        stitcher.setRegistrationResol(WORK_MEGAPIX);

        startTimer();
        stitcher.stitch(imgs, pano);
        stopTimer();
    }

    EXPECT_NEAR(pano.size().width, 1124, 50);
    EXPECT_NEAR(pano.size().height, 644, 30);

    SANITY_CHECK_NOTHING();
}
Example #5
0
UMat& UMat::setTo(InputArray _value, InputArray _mask)
{
    bool haveMask = !_mask.empty();
#ifdef HAVE_OPENCL
    int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp);

    if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
    {
        Mat value = _value.getMat();
        CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
        int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)),
                kertp = CV_MAKE_TYPE(d, kercn);

        double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
                           0, 0, 0, 0, 0, 0, 0, 0 };
        convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn);

        int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
        String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
                             ocl::memopTypeToStr(kertp), rowsPerWI,
                             ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)),
                             ocl::memopTypeToStr(d), kercn);

        ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
        if( !setK.empty() )
        {
            ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn);
            UMat mask;

            if( haveMask )
            {
                mask = _mask.getUMat();
                CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
                ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
                        dstarg = ocl::KernelArg::ReadWrite(*this);
                setK.args(maskarg, dstarg, scalararg);
            }
            else
            {
                ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn);
                setK.args(dstarg, scalararg);
            }

            size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI };
            if( setK.run(2, globalsize, NULL, false) )
            {
                CV_IMPL_ADD(CV_IMPL_OCL);
                return *this;
            }
        }
    }
#endif
    Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
    m.setTo(_value, _mask);
    return *this;
}
Example #6
0
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat image_sums, temp;
    integral(_image, temp);

    if (temp.depth() == CV_64F)
        temp.convertTo(image_sums, CV_32F);
    else
        image_sums = temp;

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
                  format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    if (cn == 1)
    {
        float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area();

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result),
                templ.rows, templ.cols, templ_sum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0);
        templ_sum = sum(templ) / tsize.area();

        if (cn == 2)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1]);
        else if (cn==3)
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2]);
        else
            k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
                   templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]);
    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #7
0
OCL_PERF_TEST_P(stitch, boat, TEST_DETECTORS)
{
    Size expected_dst_size(10789, 2663);
    checkDeviceMaxMemoryAllocSize(expected_dst_size, CV_16SC3, 4);

#if defined(_WIN32) && !defined(_WIN64)
    if (cv::ocl::useOpenCL())
        throw ::perf::TestBase::PerfSkipTestException();
#endif

    UMat pano;

    vector<Mat> _imgs;
    _imgs.push_back( imread( getDataPath("stitching/boat1.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat2.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat3.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat4.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat5.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat6.jpg") ) );
    vector<UMat> imgs = ToUMat(_imgs);

    Ptr<detail::FeaturesFinder> featuresFinder = getFeatureFinder(GetParam());
    Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb"
            ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE)
            : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);

    declare.iterations(20);

    while(next())
    {
        Stitcher stitcher = Stitcher::createDefault();
        stitcher.setFeaturesFinder(featuresFinder);
        stitcher.setFeaturesMatcher(featuresMatcher);
        stitcher.setWarper(makePtr<SphericalWarper>());
        stitcher.setRegistrationResol(WORK_MEGAPIX);

        startTimer();
        stitcher.stitch(imgs, pano);
        stopTimer();
    }

    EXPECT_NEAR(pano.size().width, expected_dst_size.width, 200);
    EXPECT_NEAR(pano.size().height, expected_dst_size.height, 100);

    SANITY_CHECK_NOTHING();
}
Example #8
0
UMat& UMat::setTo(InputArray _value, InputArray _mask)
{
    bool haveMask = !_mask.empty();
    int tp = type(), cn = CV_MAT_CN(tp);
    if( dims <= 2 && cn <= 4 && cn != 3 && ocl::useOpenCL() )
    {
        Mat value = _value.getMat();
        CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
        double buf[4];
        convertAndUnrollScalar(value, tp, (uchar*)buf, 1);

        char opts[1024];
        sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp));

        ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
        if( !setK.empty() )
        {
            ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp));
            UMat mask;

            if( haveMask )
            {
                mask = _mask.getUMat();
                CV_Assert( mask.size() == size() && mask.type() == CV_8U );
                ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
                ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this);
                setK.args(maskarg, dstarg, scalararg);
            }
            else
            {
                ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this);
                setK.args(dstarg, scalararg);
            }

            size_t globalsize[] = { cols, rows };
            if( setK.run(2, globalsize, 0, false) )
                return *this;
        }
    }
    Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
    m.setTo(_value, _mask);
    return *this;
}
Example #9
0
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
        return false;

    static const int tileSize = 16;

    String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
                                ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
                                tileSize,
                                doubleSupport ? " -D DOUBLE_SUPPORT" : "");

    ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (kcols.empty())
        return false;

    UMat src = _src.getUMat();
    Size src_size = src.size();
    Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
    UMat buf(bufsize, sdepth);
    UMat buf_sq(bufsize, sqdepth);
    kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
    size_t gt = src.cols, lt = tileSize;
    if (!kcols.run(1, &gt, &lt, false))
        return false;

    ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
    if (krows.empty())
        return false;

    Size sumsize(src_size.width + 1, src_size.height + 1);
    _sum.create(sumsize, sdepth);
    UMat sum = _sum.getUMat();
    _sqsum.create(sumsize, sqdepth);
    UMat sum_sq = _sqsum.getUMat();

    krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
    gt = src.rows;
    return krows.run(1, &gt, &lt, false);
}
Example #10
0
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
           thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
         (!doubleSupport && depth == CV_64F))
        return false;

    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
    ocl::Device dev = ocl::Device::getDefault();
    int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;

    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
                  format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
                         ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    _dst.create(src.size(), type);
    UMat dst = _dst.getUMat();

    if (depth <= CV_32S)
        thresh = cvFloor(thresh);

    const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
    double min_val = min_vals[depth];

    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));

    size_t globalsize[2] = { static_cast<size_t>(dst.cols * cn / kercn), static_cast<size_t>(dst.rows) };
    globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
    return k.run(2, globalsize, NULL, false);
}
Example #11
0
OCL_PERF_TEST_P(stitch, boat, TEST_DETECTORS)
{
    UMat pano;

    vector<Mat> _imgs;
    _imgs.push_back( imread( getDataPath("stitching/boat1.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat2.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat3.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat4.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat5.jpg") ) );
    _imgs.push_back( imread( getDataPath("stitching/boat6.jpg") ) );
    vector<UMat> imgs = ToUMat(_imgs);

    Ptr<detail::FeaturesFinder> featuresFinder = GetParam() == "orb"
            ? Ptr<detail::FeaturesFinder>(new detail::OrbFeaturesFinder())
            : Ptr<detail::FeaturesFinder>(new detail::SurfFeaturesFinder());

    Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb"
            ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE)
            : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);

    declare.iterations(20);

    while(next())
    {
        Stitcher stitcher = Stitcher::createDefault();
        stitcher.setFeaturesFinder(featuresFinder);
        stitcher.setFeaturesMatcher(featuresMatcher);
        stitcher.setWarper(makePtr<SphericalWarper>());
        stitcher.setRegistrationResol(WORK_MEGAPIX);

        startTimer();
        stitcher.stitch(imgs, pano);
        stopTimer();
    }

    EXPECT_NEAR(pano.size().width, 10789, 200);
    EXPECT_NEAR(pano.size().height, 2663, 100);

    SANITY_CHECK_NOTHING();
}
Example #12
0
    static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights)
    {
        int cn = _src.channels();
        ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc,
                      format("-D cn=%d", cn));
        if (k.empty())
            return false;

        UMat src = _src.getUMat();
        _dst.create(src.size(), src.type());
        _dst.setTo(Scalar::all(0));
        UMat dst = _dst.getUMat();

        const int ksize = (btvKernelSize - 1) / 2;

        k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
              ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights));

        size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows };
        return k.run(2, globalsize, NULL, false);
    }
Example #13
0
int main(int argc, char** argv)
{
    const char* keys =
        "{ i input    | | specify input image }"
        "{ h help     | | print help message }";

    cv::CommandLineParser args(argc, argv, keys);
    if (args.has("help"))
    {
        cout << "Usage : " << argv[0] << " [options]" << endl;
        cout << "Available options:" << endl;
        args.printMessage();
        return EXIT_SUCCESS;
    }

    cv::ocl::Context ctx = cv::ocl::Context::getDefault();
    if (!ctx.ptr())
    {
        cerr << "OpenCL is not available" << endl;
        return 1;
    }
    cv::ocl::Device device = cv::ocl::Device::getDefault();
    if (!device.compilerAvailable())
    {
        cerr << "OpenCL compiler is not available" << endl;
        return 1;
    }


    UMat src;
    {
        string image_file = args.get<string>("i");
        if (!image_file.empty())
        {
            Mat image = imread(image_file);
            if (image.empty())
            {
                cout << "error read image: " << image_file << endl;
                return 1;
            }
            cvtColor(image, src, COLOR_BGR2GRAY);
        }
        else
        {
            Mat frame(cv::Size(640, 480), CV_8U, Scalar::all(128));
            Point p(frame.cols / 2, frame.rows / 2);
            line(frame, Point(0, frame.rows / 2), Point(frame.cols, frame.rows / 2), 1);
            circle(frame, p, 200, Scalar(32, 32, 32), 8, LINE_AA);
            string str = "OpenCL";
            int baseLine = 0;
            Size box = getTextSize(str, FONT_HERSHEY_COMPLEX, 2, 5, &baseLine);
            putText(frame, str, Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine),
                    FONT_HERSHEY_COMPLEX, 2, Scalar(255, 255, 255), 5, LINE_AA);
            frame.copyTo(src);
        }
    }


    cv::String module_name; // empty to disable OpenCL cache

    {
        cout << "OpenCL program source: " << endl;
        cout << "======================================================================================================" << endl;
        cout << opencl_kernel_src << endl;
        cout << "======================================================================================================" << endl;
        //! [Define OpenCL program source]
        cv::ocl::ProgramSource source(module_name, "simple", opencl_kernel_src, "");
        //! [Define OpenCL program source]

        //! [Compile/build OpenCL for current OpenCL device]
        cv::String errmsg;
        cv::ocl::Program program(source, "", errmsg);
        if (program.ptr() == NULL)
        {
            cerr << "Can't compile OpenCL program:" << endl << errmsg << endl;
            return 1;
        }
        //! [Compile/build OpenCL for current OpenCL device]

        if (!errmsg.empty())
        {
            cout << "OpenCL program build log:" << endl << errmsg << endl;
        }

        //! [Get OpenCL kernel by name]
        cv::ocl::Kernel k("magnutude_filter_8u", program);
        if (k.empty())
        {
            cerr << "Can't get OpenCL kernel" << endl;
            return 1;
        }
        //! [Get OpenCL kernel by name]

        UMat result(src.size(), CV_8UC1);

        //! [Define kernel parameters and run]
        size_t globalSize[2] = {(size_t)src.cols, (size_t)src.rows};
        size_t localSize[2] = {8, 8};
        bool executionResult = k
            .args(
                cv::ocl::KernelArg::ReadOnlyNoSize(src), // size is not used (similar to 'dst' size)
                cv::ocl::KernelArg::WriteOnly(result),
                (float)2.0
            )
            .run(2, globalSize, localSize, true);
        if (!executionResult)
        {
            cerr << "OpenCL kernel launch failed" << endl;
            return 1;
        }
        //! [Define kernel parameters and run]

        imshow("Source", src);
        imshow("Result", result);

        for (;;)
        {
            int key = waitKey();
            if (key == 27/*ESC*/ || key == 'q' || key == 'Q')
                break;
        }
    }
    return 0;
}
Example #14
0
static bool ocl_moments( InputArray _src, Moments& m)
{
    const int TILE_SIZE = 32;
    const int K = 10;
    ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, format("-D TILE_SIZE=%d", TILE_SIZE));
    if( k.empty() )
        return false;

    UMat src = _src.getUMat();
    Size sz = src.size();
    int xtiles = (sz.width + TILE_SIZE-1)/TILE_SIZE;
    int ytiles = (sz.height + TILE_SIZE-1)/TILE_SIZE;
    int ntiles = xtiles*ytiles;
    UMat umbuf(1, ntiles*K, CV_32S);

    size_t globalsize[] = {xtiles, sz.height}, localsize[] = {1, TILE_SIZE};
    bool ok = k.args(ocl::KernelArg::ReadOnly(src),
                     ocl::KernelArg::PtrWriteOnly(umbuf),
                     xtiles).run(2, globalsize, localsize, true);
    if(!ok)
        return false;
    Mat mbuf = umbuf.getMat(ACCESS_READ);
    for( int i = 0; i < ntiles; i++ )
    {
        double x = (i % xtiles)*TILE_SIZE, y = (i / xtiles)*TILE_SIZE;
        const int* mom = mbuf.ptr<int>() + i*K;
        double xm = x * mom[0], ym = y * mom[0];

        // accumulate moments computed in each tile

        // + m00 ( = m00' )
        m.m00 += mom[0];

        // + m10 ( = m10' + x*m00' )
        m.m10 += mom[1] + xm;

        // + m01 ( = m01' + y*m00' )
        m.m01 += mom[2] + ym;

        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
        m.m20 += mom[3] + x * (mom[1] * 2 + xm);

        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
        m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];

        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
        m.m02 += mom[5] + y * (mom[2] * 2 + ym);

        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
        m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));

        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
        m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];

        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
        m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];

        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
        m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
    }

    return true;
}
Example #15
0
void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& newPts, const UMat& newNrm,
                          Affine3f pose, int level, Matx66f &A, Vec6f &b) const
{
    CV_TRACE_FUNCTION();

    Size oldSize = oldPts.size(), newSize = newPts.size();
    CV_Assert(oldSize == oldNrm.size());
    CV_Assert(newSize == newNrm.size());

    // calculate 1x7 vector ab to produce b and upper triangle of A:
    // [A|b] = ab*(ab^t)
    // and then reduce it across work groups

    cv::String errorStr;
    ocl::ProgramSource source = ocl::rgbd::icp_oclsrc;
    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
    ocl::Kernel k;
    k.create("getAb", source, options, &errorStr);

    if(k.empty())
        throw std::runtime_error("Failed to create kernel: " + errorStr);

    size_t globalSize[2];
    globalSize[0] = (size_t)newPts.cols;
    globalSize[1] = (size_t)newPts.rows;

    const ocl::Device& device = ocl::Device::getDefault();
    // workaround for Intel's integrated GPU
    size_t wgsLimit = device.isIntel() ? 64 : device.maxWorkGroupSize();
    size_t memSize = device.localMemSize();
    // local memory should keep upperTriangles for all threads in group for reduce
    const size_t ltsz = UTSIZE*sizeof(float);
    const size_t lcols = 32;
    size_t lrows = min(memSize/ltsz, wgsLimit)/lcols;
    // round lrows down to 2^n
    lrows = roundDownPow2(lrows);
    size_t localSize[2] = {lcols, lrows};
    Size ngroups((int)divUp(globalSize[0], (unsigned int)localSize[0]),
                 (int)divUp(globalSize[1], (unsigned int)localSize[1]));

    // size of local buffer for group-wide reduce
    size_t lsz = localSize[0]*localSize[1]*ltsz;

    Intr::Projector proj = intrinsics.scale(level).makeProjector();
    Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy);

    UMat& groupedSumGpu = groupedSumBuffers[level];
    groupedSumGpu.create(Size(ngroups.width*UTSIZE, ngroups.height),
                         CV_32F);
    groupedSumGpu.setTo(0);

    // TODO: optimization possible:
    // samplers instead of oldPts/oldNrm (mask needed)
    k.args(ocl::KernelArg::ReadOnlyNoSize(oldPts),
           ocl::KernelArg::ReadOnlyNoSize(oldNrm),
           oldSize,
           ocl::KernelArg::ReadOnlyNoSize(newPts),
           ocl::KernelArg::ReadOnlyNoSize(newNrm),
           newSize,
           ocl::KernelArg::Constant(pose.matrix.val,
                                    sizeof(pose.matrix.val)),
           fxy.val, cxy.val,
           distanceThreshold*distanceThreshold,
           cos(angleThreshold),
           //TODO: replace by KernelArg::Local(lsz)
           ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
           ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu)
           );

    if(!k.run(2, globalSize, localSize, true))
        throw std::runtime_error("Failed to run kernel");

    float upperTriangle[UTSIZE];
    for(int i = 0; i < UTSIZE; i++)
        upperTriangle[i] = 0;

    Mat groupedSumCpu = groupedSumGpu.getMat(ACCESS_READ);

    for(int y = 0; y < ngroups.height; y++)
    {
        const float* rowr = groupedSumCpu.ptr<float>(y);
        for(int x = 0; x < ngroups.width; x++)
        {
            const float* p = rowr + x*UTSIZE;
            for(int j = 0; j < UTSIZE; j++)
            {
                upperTriangle[j] += p[j];
            }
        }
    }
    groupedSumCpu.release();

    ABtype sumAB = ABtype::zeros();
    int pos = 0;
    for(int i = 0; i < 6; i++)
    {
        for(int j = i; j < 7; j++)
        {
            sumAB(i, j) = upperTriangle[pos++];
        }
    }

    // splitting AB matrix to A and b
    for(int i = 0; i < 6; i++)
    {
        // augment lower triangle of A by symmetry
        for(int j = i; j < 6; j++)
        {
            A(i, j) = A(j, i) = sumAB(i, j);
        }

        b(i) = sumAB(i, 6);
    }
}
Example #16
0
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);

    UMat temp, image_sums, image_sqsums;
    integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);

    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);

    ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
        format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    if (k.empty())
        return false;

    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();

    float scale = 1.f / tsize.area();

    if (cn == 1)
    {
        float templ_sum = (float)sum(templ)[0];

        multiply(templ, templ, temp, 1, CV_32F);
        float templ_sqsum = (float)sum(temp)[0];

        templ_sqsum -= scale * templ_sum * templ_sum;
        templ_sum   *= scale;

        if (templ_sqsum < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return true;
        }

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
                      ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
    }
    else
    {
        Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
        templ_sum = sum(templ);

        multiply(templ, templ, temp, 1, CV_32F);
        templ_sqsum = sum(temp);

        float templ_sqsum_sum = 0;
        for (int i = 0; i < cn; i ++)
            templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];

        templ_sum *= scale;

        if (templ_sqsum_sum < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return true;
        }

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
                   ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
                   templ_sum, templ_sqsum_sum);    }

    size_t globalsize[2] = { result.cols, result.rows };
    return k.run(2, globalsize, NULL, false);
}
Example #17
0
cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
                              TermCriteria criteria )
{
    CV_INSTRUMENT_REGION()

    const int TOLERANCE = 10;
    Size size;
    Mat mat;
    UMat umat;
    bool isUMat = _probImage.isUMat();

    if (isUMat)
        umat = _probImage.getUMat(), size = umat.size();
    else
        mat = _probImage.getMat(), size = mat.size();

    meanShift( _probImage, window, criteria );

    window.x -= TOLERANCE;
    if( window.x < 0 )
        window.x = 0;

    window.y -= TOLERANCE;
    if( window.y < 0 )
        window.y = 0;

    window.width += 2 * TOLERANCE;
    if( window.x + window.width > size.width )
        window.width = size.width - window.x;

    window.height += 2 * TOLERANCE;
    if( window.y + window.height > size.height )
        window.height = size.height - window.y;

    // Calculating moments in new center mass
    Moments m = isUMat ? moments(umat(window)) : moments(mat(window));

    double m00 = m.m00, m10 = m.m10, m01 = m.m01;
    double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02;

    if( fabs(m00) < DBL_EPSILON )
        return RotatedRect();

    double inv_m00 = 1. / m00;
    int xc = cvRound( m10 * inv_m00 + window.x );
    int yc = cvRound( m01 * inv_m00 + window.y );
    double a = mu20 * inv_m00, b = mu11 * inv_m00, c = mu02 * inv_m00;

    // Calculating width & height
    double square = std::sqrt( 4 * b * b + (a - c) * (a - c) );

    // Calculating orientation
    double theta = atan2( 2 * b, a - c + square );

    // Calculating width & length of figure
    double cs = cos( theta );
    double sn = sin( theta );

    double rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;
    double rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;
    double length = std::sqrt( rotate_a * inv_m00 ) * 4;
    double width = std::sqrt( rotate_c * inv_m00 ) * 4;

    // In case, when tetta is 0 or 1.57... the Length & Width may be exchanged
    if( length < width )
    {
        std::swap( length, width );
        std::swap( cs, sn );
        theta = CV_PI*0.5 - theta;
    }

    // Saving results
    int _xc = cvRound( xc );
    int _yc = cvRound( yc );

    int t0 = cvRound( fabs( length * cs ));
    int t1 = cvRound( fabs( width * sn ));

    t0 = MAX( t0, t1 ) + 2;
    window.width = MIN( t0, (size.width - _xc) * 2 );

    t0 = cvRound( fabs( length * sn ));
    t1 = cvRound( fabs( width * cs ));

    t0 = MAX( t0, t1 ) + 2;
    window.height = MIN( t0, (size.height - _yc) * 2 );

    window.x = MAX( 0, _xc - window.width / 2 );
    window.y = MAX( 0, _yc - window.height / 2 );

    window.width = MIN( size.width - window.x, window.width );
    window.height = MIN( size.height - window.y, window.height );

    RotatedRect box;
    box.size.height = (float)length;
    box.size.width = (float)width;
    box.angle = (float)((CV_PI*0.5+theta)*180./CV_PI);
    while(box.angle < 0)
        box.angle += 360;
    while(box.angle >= 360)
        box.angle -= 360;
    if(box.angle >= 180)
        box.angle -= 180;
    box.center = Point2f( window.x + window.width*0.5f, window.y + window.height*0.5f);

    return box;
}
Example #18
0
int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
{
    CV_INSTRUMENT_REGION()

    Size size;
    int cn;
    Mat mat;
    UMat umat;
    bool isUMat = _probImage.isUMat();

    if (isUMat)
        umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size();
    else
        mat = _probImage.getMat(), cn = mat.channels(), size = mat.size();

    Rect cur_rect = window;

    CV_Assert( cn == 1 );

    if( window.height <= 0 || window.width <= 0 )
        CV_Error( Error::StsBadArg, "Input window has non-positive sizes" );

    window = window & Rect(0, 0, size.width, size.height);

    double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.;
    eps = cvRound(eps*eps);
    int i, niters = (criteria.type & TermCriteria::MAX_ITER) ? std::max(criteria.maxCount, 1) : 100;

    for( i = 0; i < niters; i++ )
    {
        cur_rect = cur_rect & Rect(0, 0, size.width, size.height);
        if( cur_rect == Rect() )
        {
            cur_rect.x = size.width/2;
            cur_rect.y = size.height/2;
        }
        cur_rect.width = std::max(cur_rect.width, 1);
        cur_rect.height = std::max(cur_rect.height, 1);

        Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect));

        // Calculating center of mass
        if( fabs(m.m00) < DBL_EPSILON )
            break;

        int dx = cvRound( m.m10/m.m00 - window.width*0.5 );
        int dy = cvRound( m.m01/m.m00 - window.height*0.5 );

        int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width);
        int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height);

        dx = nx - cur_rect.x;
        dy = ny - cur_rect.y;
        cur_rect.x = nx;
        cur_rect.y = ny;

        // Check for coverage centers mass & window
        if( dx*dx + dy*dy < eps )
            break;
    }

    window = cur_rect;
    return i;
}
Example #19
0
 void UMatToVector(const UMat & um, std::vector<Point2f> & v) const
 {
     v.resize(um.size().area());
     um.copyTo(Mat(um.size(), CV_32FC2, &v[0]));
 }
Example #20
0
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares( const UMat& image, vector<vector<Point> >& squares )
{
    squares.clear();
    UMat pyr, timg, gray0(image.size(), CV_8U), gray;

    // down-scale and upscale the image to filter out the noise
    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
    pyrUp(pyr, timg, image.size());
    vector<vector<Point> > contours;

    // find squares in every color plane of the image
    for( int c = 0; c < 3; c++ )
    {
        int ch[] = {c, 0};
        mixChannels(timg, gray0, ch, 1);

        // try several threshold levels
        for( int l = 0; l < N; l++ )
        {
            // hack: use Canny instead of zero threshold level.
            // Canny helps to catch squares with gradient shading
            if( l == 0 )
            {
                // apply Canny. Take the upper threshold from slider
                // and set the lower to 0 (which forces edges merging)
                Canny(gray0, gray, 0, thresh, 5);
                // dilate canny output to remove potential
                // holes between edge segments
                dilate(gray, gray, UMat(), Point(-1,-1));
            }
            else
            {
                // apply threshold if l!=0:
                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
            }

            // find contours and store them all as a list
            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);

            vector<Point> approx;

            // test each contour
            for( size_t i = 0; i < contours.size(); i++ )
            {
                // approximate contour with accuracy proportional
                // to the contour perimeter

                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);

                // square contours should have 4 vertices after approximation
                // relatively large area (to filter out noisy contours)
                // and be convex.
                // Note: absolute value of an area is used because
                // area may be positive or negative - in accordance with the
                // contour orientation
                if( approx.size() == 4 &&
                        fabs(contourArea(Mat(approx))) > 1000 &&
                        isContourConvex(Mat(approx)) )
                {
                    double maxCosine = 0;

                    for( int j = 2; j < 5; j++ )
                    {
                        // find the maximum cosine of the angle between joint edges
                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
                        maxCosine = MAX(maxCosine, cosine);
                    }

                    // if cosines of all angles are small
                    // (all angles are ~90 degree) then write quandrange
                    // vertices to resultant sequence
                    if( maxCosine < 0.3 )
                        squares.push_back(approx);
                }
            }
        }
    }
}