Ejemplo n.º 1
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        const clw::Image2D& input = reader.readSocket(0).getDeviceImageMono();
        clw::Image2D& output = writer.acquireSocket(0).getDeviceImageMono();

        int imageWidth = input.width();
        int imageHeight = input.height();

        if(imageWidth == 0 || imageHeight == 0)
            return ExecutionStatus(EStatus::Ok);

        clw::Kernel kernelConvertBayer2Gray;
        switch(_BayerCode.cast<Enum>().cast<cvu::EBayerCode>())
        {
        case cvu::EBayerCode::RG:
            kernelConvertBayer2Gray = _gpuComputeModule->acquireKernel(_kidConvertRG2Gray);
            break;
        case cvu::EBayerCode::GB:
            kernelConvertBayer2Gray = _gpuComputeModule->acquireKernel(_kidConvertGB2Gray);
            break;
        case cvu::EBayerCode::GR:
            kernelConvertBayer2Gray = _gpuComputeModule->acquireKernel(_kidConvertGR2Gray);
            break;
        case cvu::EBayerCode::BG:
            kernelConvertBayer2Gray = _gpuComputeModule->acquireKernel(_kidConvertBG2Gray);
            break;
        }

        if(kernelConvertBayer2Gray.isNull())
            return ExecutionStatus(EStatus::Error, "Bad bayer code");

        // Ensure output image size is enough
        if(output.isNull() || output.width() != imageWidth || output.height() != imageHeight)
        {
            output = _gpuComputeModule->context().createImage2D(
                clw::EAccess::ReadWrite, clw::EMemoryLocation::Device, 
                clw::ImageFormat(clw::EChannelOrder::R, clw::EChannelType::Normalized_UInt8), 
                imageWidth, imageHeight);
        }

        cl_float3 gains = { (float) _redGain, (float) _greenGain, (float) _blueGain };
        int sharedWidth = 16 + 2;
        int sharedHeight = 16 + 2;

        kernelConvertBayer2Gray.setLocalWorkSize(16, 16);
        kernelConvertBayer2Gray.setRoundedGlobalWorkSize(imageWidth, imageHeight);
        kernelConvertBayer2Gray.setArg(0, input);
        kernelConvertBayer2Gray.setArg(1, output);
        kernelConvertBayer2Gray.setArg(2, gains);
        kernelConvertBayer2Gray.setArg(3, clw::LocalMemorySize(sharedWidth*sharedHeight*sizeof(float)));
        kernelConvertBayer2Gray.setArg(4, sharedWidth);
        kernelConvertBayer2Gray.setArg(5, sharedHeight);

        _gpuComputeModule->queue().asyncRunKernel(kernelConvertBayer2Gray);
        _gpuComputeModule->queue().finish();

        return ExecutionStatus(EStatus::Ok);
    }
Ejemplo n.º 2
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        // Read input sockets
        const cv::Mat& src = reader.readSocket(0).getImageMono();
        // ouputs
        KeyPoints& kp = writer.acquireSocket(0).getKeypoints();
        cv::Mat& descriptors = writer.acquireSocket(1).getArray();

        // Validate inputs
        if(src.empty())
            return ExecutionStatus(EStatus::Ok);

        (*_brisk)(src, cv::noArray(), kp.kpoints, descriptors);
        kp.image = src;

        return ExecutionStatus(EStatus::Ok, 
            string_format("Keypoints detected: %d", (int) kp.kpoints.size()));
    }
Ejemplo n.º 3
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        // Read input sockets
        const KeyPoints& kp = reader.readSocket(0).getKeypoints();

        // Validate inputs
        if(kp.kpoints.empty() || kp.image.empty())
            return ExecutionStatus(EStatus::Ok);

        // Acquire output sockets
        KeyPoints& outKp = writer.acquireSocket(0).getKeypoints();
        cv::Mat& outDescriptors = writer.acquireSocket(1).getArray();
        outKp = kp;

        _brisk->compute(kp.image, outKp.kpoints, outDescriptors);

        return ExecutionStatus(EStatus::Ok);
    }
Ejemplo n.º 4
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        // Read input sockets
        const cv::Mat& src = reader.readSocket(0).getImageMono();
        // Acquire output sockets
        KeyPoints& kp = writer.acquireSocket(0).getKeypoints();
        cv::Mat& descriptors = writer.acquireSocket(1).getArray();

        // Validate inputs
        if(src.empty())
            return ExecutionStatus(EStatus::Ok);

        // Do stuff
        cv::SIFT sift(_nFeatures, _nOctaveLayers,
            _contrastThreshold, _edgeThreshold, _sigma);
        sift(src, cv::noArray(), kp.kpoints, descriptors);
        kp.image = src;

        return ExecutionStatus(EStatus::Ok, 
            string_format("Keypoints detected: %d", (int) kp.kpoints.size()));
    }
Ejemplo n.º 5
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        // Read input sockets
        const cv::Mat& src = reader.readSocket(0).getImageMono();
        // Acquire output sockets
        KeyPoints& kp = writer.acquireSocket(0).getKeypoints();

        // Validate inputs
        if(src.empty())
            return ExecutionStatus(EStatus::Ok);

        _brisk->detect(src, kp.kpoints);
        kp.image = src;

        return ExecutionStatus(EStatus::Ok, 
            string_format("Keypoints detected: %d", (int) kp.kpoints.size()));
    }
Ejemplo n.º 6
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        const clw::Image2D& deviceImage = reader.readSocket(0).getDeviceImageMono();
        KeyPoints& kp = writer.acquireSocket(0).getKeypoints();
        cv::Mat& descriptors = writer.acquireSocket(1).getArray();
        DeviceArray& descriptors_dev = writer.acquireSocket(2).getDeviceArray();

        int imageWidth = deviceImage.width();
        int imageHeight = deviceImage.height();
        if(imageWidth == 0 || imageHeight == 0)
            return ExecutionStatus(EStatus::Ok);

        GpuPerformanceMarker marker(_gpuComputeModule->activityLogger(), "SURF");

        if(!_constantsUploaded)
            uploadSurfConstants();

        ensureKeypointsBufferIsEnough();

        // Zero keypoints counter (use pinned memory)
        // Another way would be to map pinned buffer (allocated on a host) 
        // and read the device buffer using just obtained pointer. If data transfer is single-direction
        // we don't need to map and unmap every time - just after creation and before destruction of a buffer

        // From AMD APP OpenCL Programming Guide:
        // pinnedBuffer = clCreateBuffer(CL_MEM_ALLOC_HOST_PTR or CL_MEM_USE_HOST_PTR)
        // deviceBuffer = clCreateBuffer()

        // 1)
        // void* pinnedMemory = clEnqueueMapBuffer(pinnedBuffer) -> can be done only once
        // clEnqueueRead/WriteBuffer(deviceBuffer, pinnedMemory)
        // clEnqueueUnmapMemObject(pinnedBuffer, pinnedMemory) -> can be done only once
        //
        // 2) 
        // void* pinnedMemory = clEnqueueMapBuffer(pinnedBuffer)
        // [Application writes or modifies memory (host memory bandwith)]
        // clEnqueueUnmapMemObject(pinnedBuffer, pinnedMemory)
        // clEnqueueCopyBuffer(pinnedBuffer, deviceBuffer)
        //  - or -
        // clEnqueueCopyBuffer(deviceBuffer, pinnedBuffer)
        // void* pinnedMemory = clEnqueueMapBuffer(pinnedBuffer)
        // [Application reads memory (host memory bandwith)]
        // clEnqueueUnmapMemObject(pinnedBuffer, pinnedMemory)


        // On AMD these are the same solutions
        int* intPtr = (int*) _gpuComputeModule->queue().mapBuffer(_pinnedKeypointsCount_cl, clw::EMapAccess::Write);
        if(!intPtr)
            return ExecutionStatus(EStatus::Error, "Couldn't mapped keypoints counter buffer to host address space");
        intPtr[0] = 0;
        _gpuComputeModule->queue().asyncUnmap(_pinnedKeypointsCount_cl, intPtr);
        _gpuComputeModule->queue().asyncCopyBuffer(_pinnedKeypointsCount_cl, _keypointsCount_cl);

        convertImageToIntegral(deviceImage, imageWidth, imageHeight);

        prepareScaleSpaceLayers(imageWidth, imageHeight);
        buildScaleSpace(imageWidth, imageHeight);
        findScaleSpaceMaxima();

        kp.image.create(imageHeight, imageWidth, CV_8UC1);
        _gpuComputeModule->dataQueue().asyncReadImage2D(deviceImage, kp.image.data, (int) kp.image.step);
        _gpuComputeModule->dataQueue().flush();

        _gpuComputeModule->activityLogger().beginPerfMarker("Read number of keypoints", "SURF");

        // Read keypoints counter (use pinned memory)
        _gpuComputeModule->queue().asyncCopyBuffer(_keypointsCount_cl, _pinnedKeypointsCount_cl);
        intPtr = (int*) _gpuComputeModule->queue().mapBuffer(_pinnedKeypointsCount_cl, clw::EMapAccess::Read);
        if(!intPtr)
            return ExecutionStatus(EStatus::Error, "Couldn't mapped keypoints counter buffer to host address space");
        int keypointsCount = min(intPtr[0], kKeypointsMax);
        _gpuComputeModule->queue().asyncUnmap(_pinnedKeypointsCount_cl, intPtr);

        _gpuComputeModule->activityLogger().endPerfMarker();

        if(keypointsCount > 0)
        {
            if(!_upright)
                findKeypointOrientation(keypointsCount);
            else
                uprightKeypointOrientation(keypointsCount);
            calculateDescriptors(keypointsCount);

            GpuPerformanceMarker marker(_gpuComputeModule->activityLogger(), "Read results", "SURF");

            // Start copying descriptors to pinned buffer
            if(_downloadDescriptors)
                _gpuComputeModule->queue().asyncCopyBuffer(_descriptors_cl, _pinnedDescriptors_cl);

            vector<KeyPoint> kps = downloadKeypoints(keypointsCount);
            kp.kpoints = transformKeyPoint(kps);

            descriptors_dev = DeviceArray::createFromBuffer(_descriptors_cl, 64, keypointsCount, EDataType::Float);

            if(_downloadDescriptors)
            {
                descriptors.create(keypointsCount, 64, CV_32F);
                float* floatPtr = (float*) _gpuComputeModule->queue().mapBuffer(_pinnedDescriptors_cl, clw::EMapAccess::Read);
                if(!floatPtr)
                    return ExecutionStatus(EStatus::Error, "Couldn't mapped descriptors buffer to host address space");
                if(descriptors.step == 64*sizeof(float))
                {
                    //copy(floatPtr, floatPtr + 64*keypointsCount, descriptors.ptr<float>());
                    memcpy(descriptors.ptr<float>(), floatPtr, sizeof(float)*64 * keypointsCount);
                }
                else
                {
                    for(int row = 0; row < keypointsCount; ++row)
                        //copy(floatPtr + 64*row, floatPtr + 64*row + 64, descriptors.ptr<float>(row));
                        memcpy(descriptors.ptr<float>(row), floatPtr + 64*row, sizeof(float)*64);
                }

                _gpuComputeModule->queue().asyncUnmap(_pinnedDescriptors_cl, floatPtr);
            }

            // Finish downloading input image
            _gpuComputeModule->dataQueue().finish();
        }
        else
        {
            // Finish downloading input image
            _gpuComputeModule->dataQueue().finish();

            kp.kpoints = vector<cv::KeyPoint>();
            descriptors = cv::Mat();
            descriptors_dev = DeviceArray();
        }

        return ExecutionStatus(EStatus::Ok, 
            string_format("Keypoints detected: %d", (int) kp.kpoints.size()));
    }
Ejemplo n.º 7
0
    ExecutionStatus execute(NodeSocketReader& reader, NodeSocketWriter& writer) override
    {
        const clw::Image2D& deviceImage = reader.readSocket(0).getDeviceImageMono();
        clw::Image2D& deviceDest = writer.acquireSocket(0).getDeviceImageMono();

        int srcWidth = deviceImage.width();
        int srcHeight = deviceImage.height();

        if(srcWidth == 0 || srcHeight == 0)
            return ExecutionStatus(EStatus::Ok);

        clw::Kernel kernelGaussMix = _gpuComputeModule->acquireKernel(_kidGaussMix);

        /*
            Create mixture data buffer
        */
        resetMixturesState(srcWidth * srcHeight);

        if(deviceDest.isNull()
            || deviceDest.width() != srcWidth
            || deviceDest.height() != srcHeight)
        {
            // Obraz (w zasadzie maska) pierwszego planu
            deviceDest = _gpuComputeModule->context().createImage2D(
                clw::EAccess::ReadWrite, clw::EMemoryLocation::Device,
                clw::ImageFormat(clw::EChannelOrder::R, clw::EChannelType::Normalized_UInt8),
                srcWidth, srcHeight);
        }

        // Calculate dynamic learning rate (if necessary)
        ++_nframe;
        float alpha = _learningRate >= 0 && _nframe > 1 
            ? _learningRate
            : 1.0f/std::min(_nframe, _history.cast<int>());

        kernelGaussMix.setLocalWorkSize(16, 16);
        kernelGaussMix.setRoundedGlobalWorkSize(srcWidth, srcHeight);
        kernelGaussMix.setArg(0, deviceImage);
        kernelGaussMix.setArg(1, deviceDest);
        kernelGaussMix.setArg(2, _mixtureDataBuffer);
        kernelGaussMix.setArg(3, _mixtureParamsBuffer);
        kernelGaussMix.setArg(4, alpha);
        _gpuComputeModule->queue().asyncRunKernel(kernelGaussMix);

        if(_showBackground)
        {
            clw::Image2D& deviceDestBackground = writer.acquireSocket(1).getDeviceImageMono();
            if(deviceDestBackground.isNull()
                || deviceDestBackground.width() != srcWidth
                || deviceDestBackground.height() != srcHeight)
            {
                // Obraz (w zasadzie maska) pierwszego planu
                deviceDestBackground = _gpuComputeModule->context().createImage2D(
                    clw::EAccess::ReadWrite, clw::EMemoryLocation::Device,
                    clw::ImageFormat(clw::EChannelOrder::R, clw::EChannelType::Normalized_UInt8),
                    srcWidth, srcHeight);
            }

            clw::Kernel kernelBackground = _gpuComputeModule->acquireKernel(_kidGaussBackground);

            kernelBackground.setLocalWorkSize(16, 16);
            kernelBackground.setRoundedGlobalWorkSize(srcWidth, srcHeight);
            kernelBackground.setArg(0, deviceDestBackground);
            kernelBackground.setArg(1, _mixtureDataBuffer);
            kernelBackground.setArg(2, _mixtureParamsBuffer);
            _gpuComputeModule->queue().asyncRunKernel(kernelBackground);
        }

        _gpuComputeModule->queue().finish();
        return ExecutionStatus(EStatus::Ok);
    }