Example #1
0
static bool sumTemplate(InputArray _src, UMat & result)
{
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();

    int wgs2_aligned = 1;
    while (wgs2_aligned < (int)wgs)
        wgs2_aligned <<= 1;
    wgs2_aligned >>= 1;

    char cvt[40];
    ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
                  format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
                         ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
                         ocl::convertTypeStr(depth, wdepth, cn, cvt),
                         (int)wgs, wgs2_aligned));
    if (k.empty())
        return false;

    UMat src = _src.getUMat();
    result.create(1, 1, CV_32FC1);

    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
            resarg = ocl::KernelArg::PtrWriteOnly(result);

    k.args(srcarg, src.cols, (int)src.total(), resarg);

    size_t globalsize = wgs;
    return k.run(1, &globalsize, &wgs, false);
}
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
                                      int & almostTemplateWindowSizeSqBinShift)
{
    const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
    int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
    int depth = DataType<FT>::depth;
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

    if (depth == CV_64F && !doubleSupport)
        return false;

    // precalc weight for every possible l2 dist between blocks
    // additional optimization of precalced weights to replace division(averaging) by binary shift
    CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
    int templateWindowSizeSq = templateWindowSize * templateWindowSize;
    almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
    FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;

    const FT WEIGHT_THRESHOLD = 1e-3f;
    int maxDist = 255 * 255 * cn;
    int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
    FT den = 1.0f / (h * h * cn);

    almostDist2Weight.create(1, almostMaxDist, CV_32SC1);

    ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
                  format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;

    k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
           almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);

    size_t globalsize[1] = { almostMaxDist };
    return k.run(1, globalsize, NULL, false);
}
Example #3
0
void ConvolveBuf::create(Size image_size, Size templ_size)
{
    result_size = Size(image_size.width - templ_size.width + 1,
                       image_size.height - templ_size.height + 1);

    const double blockScale = 4.5;
    const int minBlockSize = 256;

    block_size.width = cvRound(result_size.width*blockScale);
    block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
    block_size.width = std::min( block_size.width, result_size.width );
    block_size.height = cvRound(templ_size.height*blockScale);
    block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
    block_size.height = std::min( block_size.height, result_size.height );

    dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
    dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
    if( dft_size.width <= 0 || dft_size.height <= 0 )
        CV_Error( CV_StsOutOfRange, "the input arrays are too big" );

    // recompute block size
    block_size.width = dft_size.width - templ_size.width + 1;
    block_size.width = std::min( block_size.width, result_size.width);
    block_size.height = dft_size.height - templ_size.height + 1;
    block_size.height = std::min( block_size.height, result_size.height );

    image_block.create(dft_size, CV_32F);
    templ_block.create(dft_size, CV_32F);
    result_data.create(dft_size, CV_32F);

    image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);

    // Use maximum result matrix block size for the estimated DFT block size
    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
}
Example #4
0
    bool BTVL1_Base::ocl_process(InputArrayOfArrays _src, OutputArray _dst, InputArrayOfArrays _forwardMotions,
                                 InputArrayOfArrays _backwardMotions, int baseIdx)
    {
        std::vector<UMat> & src = *(std::vector<UMat> *)_src.getObj(),
                & forwardMotions = *(std::vector<UMat> *)_forwardMotions.getObj(),
                & backwardMotions = *(std::vector<UMat> *)_backwardMotions.getObj();

        // update blur filter and btv weights
        if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
        {
            filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
            curBlurKernelSize_ = blurKernelSize_;
            curBlurSigma_ = blurSigma_;
            curSrcType_ = src[0].type();
        }

        if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
        {
            calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
            Mat(btvWeights_, true).copyTo(ubtvWeights_);

            curBtvKernelSize_ = btvKernelSize_;
            curAlpha_ = alpha_;
        }

        // calc high res motions
        calcRelativeMotions(forwardMotions, backwardMotions, ulowResForwardMotions_, ulowResBackwardMotions_, baseIdx, src[0].size());

        upscaleMotions(ulowResForwardMotions_, uhighResForwardMotions_, scale_);
        upscaleMotions(ulowResBackwardMotions_, uhighResBackwardMotions_, scale_);

        uforwardMaps_.resize(uhighResForwardMotions_.size());
        ubackwardMaps_.resize(uhighResForwardMotions_.size());
        for (size_t i = 0; i < uhighResForwardMotions_.size(); ++i)
            buildMotionMaps(uhighResForwardMotions_[i], uhighResBackwardMotions_[i], uforwardMaps_[i], ubackwardMaps_[i]);

        // initial estimation
        const Size lowResSize = src[0].size();
        const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);

        resize(src[baseIdx], uhighRes_, highResSize, 0, 0, INTER_LINEAR); // TODO

        // iterations
        udiffTerm_.create(highResSize, uhighRes_.type());
        ua_.create(highResSize, uhighRes_.type());
        ub_.create(highResSize, uhighRes_.type());
        uc_.create(lowResSize, uhighRes_.type());

        for (int i = 0; i < iterations_; ++i)
        {
            udiffTerm_.setTo(Scalar::all(0));

            for (size_t k = 0; k < src.size(); ++k)
            {
                // a = M * Ih
                remap(uhighRes_, ua_, ubackwardMaps_[k], noArray(), INTER_NEAREST);
                // b = HM * Ih
                GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
                // c = DHM * Ih
                resize(ub_, uc_, lowResSize, 0, 0, INTER_NEAREST);

                diffSign(src[k], uc_, uc_);

                // a = Dt * diff
                upscale(uc_, ua_, scale_);

                // b = HtDt * diff
                GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
                // a = MtHtDt * diff
                remap(ub_, ua_, uforwardMaps_[k], noArray(), INTER_NEAREST);

                add(udiffTerm_, ua_, udiffTerm_);
            }

            if (lambda_ > 0)
            {
                calcBtvRegularization(uhighRes_, uregTerm_, btvKernelSize_, btvWeights_, ubtvWeights_);
                addWeighted(udiffTerm_, 1.0, uregTerm_, -lambda_, 0.0, udiffTerm_);
            }

            addWeighted(uhighRes_, 1.0, udiffTerm_, tau_, 0.0, uhighRes_);
        }

        Rect inner(btvKernelSize_, btvKernelSize_, uhighRes_.cols - 2 * btvKernelSize_, uhighRes_.rows - 2 * btvKernelSize_);
        uhighRes_(inner).copyTo(_dst);

        return true;
    }
Example #5
0
bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptors)
{
    int dsize = params->descriptorSize();
    int nFeatures = keypoints.cols;
    if (nFeatures == 0)
    {
        _descriptors.release();
        return true;
    }
    _descriptors.create(nFeatures, dsize, CV_32F);
    UMat descriptors;
    if( _descriptors.isUMat() )
        descriptors = _descriptors.getUMat();
    else
        descriptors.create(nFeatures, dsize, CV_32F);

    ocl::Kernel kerCalcDesc, kerNormDesc;

    if( dsize == 64 )
    {
        kerCalcDesc.create("SURF_computeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts);
        kerNormDesc.create("SURF_normalizeDescriptors64", ocl::xfeatures2d::surf_oclsrc, kerOpts);
    }
    else
    {
        CV_Assert(dsize == 128);
        kerCalcDesc.create("SURF_computeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts);
        kerNormDesc.create("SURF_normalizeDescriptors128", ocl::xfeatures2d::surf_oclsrc, kerOpts);
    }

    size_t localThreads[] = {6, 6};
    size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]};

    if(haveImageSupport)
    {
        kerCalcDesc.args(imgTex,
                         img_rows, img_cols,
                         ocl::KernelArg::ReadOnlyNoSize(keypoints),
                         ocl::KernelArg::WriteOnlyNoSize(descriptors));
    }
    else
    {
        kerCalcDesc.args(ocl::KernelArg::ReadOnlyNoSize(img),
                         img_rows, img_cols,
                         ocl::KernelArg::ReadOnlyNoSize(keypoints),
                         ocl::KernelArg::WriteOnlyNoSize(descriptors));
    }

    if(!kerCalcDesc.run(2, globalThreads, localThreads, true))
        return false;

    size_t localThreads_n[] = {dsize, 1};
    size_t globalThreads_n[] = {nFeatures*localThreads_n[0], localThreads_n[1]};

    globalThreads[0] = nFeatures * localThreads[0];
    globalThreads[1] = localThreads[1];
    bool ok = kerNormDesc.args(ocl::KernelArg::ReadWriteNoSize(descriptors)).
                        run(2, globalThreads_n, localThreads_n, true);
    if(ok && !_descriptors.isUMat())
        descriptors.copyTo(_descriptors);
    return ok;
}
Example #6
0
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
                      int aperture_size, bool L2gradient, int cn, const Size & size)
{
    CV_INSTRUMENT_REGION_OPENCL()

    UMat map;

    const ocl::Device &dev = ocl::Device::getDefault();
    int max_wg_size = (int)dev.maxWorkGroupSize();

    int lSizeX = 32;
    int lSizeY = max_wg_size / 32;

    if (lSizeY == 0)
    {
        lSizeX = 16;
        lSizeY = max_wg_size / 16;
    }
    if (lSizeY == 0)
    {
        lSizeY = 1;
    }

    if (aperture_size == 7)
    {
        low_thresh = low_thresh / 16.0f;
        high_thresh = high_thresh / 16.0f;
    }

    if (L2gradient)
    {
        low_thresh = std::min(32767.0f, low_thresh);
        high_thresh = std::min(32767.0f, high_thresh);

        if (low_thresh > 0)
            low_thresh *= low_thresh;
        if (high_thresh > 0)
            high_thresh *= high_thresh;
    }
    int low = cvFloor(low_thresh), high = cvFloor(high_thresh);

    if (!useCustomDeriv &&
        aperture_size == 3 && !_src.isSubmatrix())
    {
        /*
            stage1_with_sobel:
                Sobel operator
                Calc magnitudes
                Non maxima suppression
                Double thresholding
        */
        char cvt[40];
        ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
                               format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
                                      cn, ocl::memopTypeToStr(_src.depth()),
                                      ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
                                      ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
                                      lSizeX, lSizeY,
                                      L2gradient ? " -D L2GRAD" : ""));
        if (with_sobel.empty())
            return false;

        UMat src = _src.getUMat();
        map.create(size, CV_32S);
        with_sobel.args(ocl::KernelArg::ReadOnly(src),
                        ocl::KernelArg::WriteOnlyNoSize(map),
                        (float) low, (float) high);

        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };

        if (!with_sobel.run(2, globalsize, localsize, false))
            return false;
    }
    else
    {
        /*
            stage1_without_sobel:
                Calc magnitudes
                Non maxima suppression
                Double thresholding
        */
        double scale = 1.0;
        if (aperture_size == 7)
        {
            scale = 1 / 16.0;
        }

        UMat dx, dy;
        if (!useCustomDeriv)
        {
            Sobel(_src, dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
            Sobel(_src, dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
        }
        else
        {
            dx = dx_;
            dy = dy_;
        }

        ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
                                    format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
                                           cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
        if (without_sobel.empty())
            return false;

        map.create(size, CV_32S);
        without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
                           ocl::KernelArg::WriteOnly(map),
                           low, high);

        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };

        if (!without_sobel.run(2, globalsize, localsize, false))
            return false;
    }

    int PIX_PER_WI = 8;
    /*
        stage2:
            hysteresis (add weak edges if they are connected with strong edges)
    */

    int sizey = lSizeY / PIX_PER_WI;
    if (sizey == 0)
        sizey = 1;

    size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };

    ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
                                format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
                                PIX_PER_WI, lSizeX, sizey));

    if (edgesHysteresis.empty())
        return false;

    edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
    if (!edgesHysteresis.run(2, globalsize, localsize, false))
        return false;

    // get edges

    ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
                                format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
    if (getEdgesKernel.empty())
        return false;

    _dst.create(size, CV_8UC1);
    UMat dst = _dst.getUMat();

    getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));

    return getEdgesKernel.run(2, globalsize, NULL, false);
}