Exemplo n.º 1
0
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
    {
        std::vector<UMat> inputs;
        std::vector<UMat> outputs;

        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);

        int _layerWidth = inputs[0].size[3];
        int _layerHeight = inputs[0].size[2];

        int _imageWidth = inputs[1].size[3];
        int _imageHeight = inputs[1].size[2];

        float stepX, stepY;
        if (_stepX == 0 || _stepY == 0)
        {
            stepX = static_cast<float>(_imageWidth) / _layerWidth;
            stepY = static_cast<float>(_imageHeight) / _layerHeight;
        } else {
            stepX = _stepX;
            stepY = _stepY;
        }

        if (umat_offsetsX.empty())
        {
            Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
            Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
            Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
            Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);

            offsetsX.copyTo(umat_offsetsX);
            offsetsY.copyTo(umat_offsetsY);
            aspectRatios.copyTo(umat_aspectRatios);
            variance.copyTo(umat_variance);

            int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
            umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
        }
Exemplo n.º 2
0
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
    {
        std::vector<UMat> inputs;
        std::vector<UMat> outputs;

        bool use_half = (inps.depth() == CV_16S);
        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);

        int _layerWidth = inputs[0].size[3];
        int _layerHeight = inputs[0].size[2];

        int _imageWidth = inputs[1].size[3];
        int _imageHeight = inputs[1].size[2];

        if (umat_offsetsX.empty())
        {
            Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
            Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
            Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
            Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
            Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);

            offsetsX.copyTo(umat_offsetsX);
            offsetsY.copyTo(umat_offsetsY);
            variance.copyTo(umat_variance);
            widths.copyTo(umat_widths);
            heights.copyTo(umat_heights);
        }

        String opts;
        if (use_half)
            opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
        else
            opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";

        size_t nthreads = _layerHeight * _layerWidth;
        ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);

        kernel.set(0, (int)nthreads);
        kernel.set(1, (float)_stepX);
        kernel.set(2, (float)_stepY);
        kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
        kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
        kernel.set(5, (int)_offsetsX.size());
        kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
        kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
        kernel.set(8, (int)_boxWidths.size());
        kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
        kernel.set(10, (int)_layerHeight);
        kernel.set(11, (int)_layerWidth);
        kernel.set(12, (int)_imageHeight);
        kernel.set(13, (int)_imageWidth);
        kernel.run(1, &nthreads, NULL, false);

        // clip the prior's coordidate such that it is within [0, 1]
        if (_clip)
        {
            Mat mat = outputs[0].getMat(ACCESS_READ);
            int aspect_count = (_maxSize > 0) ? 1 : 0;
            int offset = nthreads * 4 * _offsetsX.size() * (1 + aspect_count + _aspectRatios.size());
            float* outputPtr = mat.ptr<float>() + offset;
            int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
            for (size_t d = 0; d < _outChannelSize; ++d)
            {
                outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
            }
        }

        // set the variance.
        {
            ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
            int offset = total(shape(outputs[0]), 2);
            size_t nthreads = _layerHeight * _layerWidth * _numPriors;
            kernel.set(0, (int)nthreads);
            kernel.set(1, (int)offset);
            kernel.set(2, (int)_variance.size());
            kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
            kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
            if (!kernel.run(1, &nthreads, NULL, false))
                return false;
        }
        return true;
    }