Пример #1
0
void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
{
    ncvSetDebugOutputHandler(outputHandler);

    CV_Assert(frame0.type() == CV_32FC1);
    CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());

    u.create(frame0.size(), CV_32FC1);
    v.create(frame0.size(), CV_32FC1);

    cudaDeviceProp devProp;
    cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );

    NCVBroxOpticalFlowDescriptor desc;

    desc.alpha = alpha;
    desc.gamma = gamma;
    desc.scale_factor = scale_factor;
    desc.number_of_inner_iterations = inner_iterations;
    desc.number_of_outer_iterations = outer_iterations;
    desc.number_of_solver_iterations = solver_iterations;

    NCVMemSegment frame0MemSeg;
    frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
    frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
    frame0MemSeg.size = frame0.step * frame0.rows;

    NCVMemSegment frame1MemSeg;
    frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
    frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
    frame1MemSeg.size = frame1.step * frame1.rows;

    NCVMemSegment uMemSeg;
    uMemSeg.begin.memtype = NCVMemoryTypeDevice;
    uMemSeg.begin.ptr = u.ptr();
    uMemSeg.size = u.step * u.rows;

    NCVMemSegment vMemSeg;
    vMemSeg.begin.memtype = NCVMemoryTypeDevice;
    vMemSeg.begin.ptr = v.ptr();
    vMemSeg.size = v.step * v.rows;

    NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
    NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
    NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
    NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));

    cudaStream_t stream = StreamAccessor::getStream(s);

    size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);

    ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);

    NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());

    ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
}
Пример #2
0
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
{
    CV_Assert(src.type() == CV_8UC1);

    dst.create(src.size(), src.type());

    int intBufSize;
    nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );

    ensureSizeIsEnough(1, intBufSize + 256 * sizeof(int), CV_8UC1, buf);

    GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());
    GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);

    calcHist(src, hist, s);

    cudaStream_t stream = StreamAccessor::getStream(s);

    NppStreamHandler h(stream);

    nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );

    hist::equalizeHist(src, dst, lut.ptr<int>(), stream);
}
Пример #3
0
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
    using namespace cv::gpu::cudev::fast;

    if (count_ == 0)
        return 0;

    ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);

    if (nonmaxSupression)
        return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));

    GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
    kpLoc_.colRange(0, count_).copyTo(locRow);
    keypoints.row(1).setTo(Scalar::all(0));

    return count_;
}
Пример #4
0
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
    using namespace cv::gpu::device::fast;

    if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
        CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

    if (count_ == 0)
        return 0;

    ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);

    if (nonmaxSupression)
        return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));

    GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
    kpLoc_.colRange(0, count_).copyTo(locRow);
    keypoints.row(1).setTo(Scalar::all(0));

    return count_;
}
Пример #5
0
void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
{
    using namespace cv::gpu::device::pyrlk;

    if (prevPts.empty())
    {
        nextPts.release();
        status.release();
        if (err) err->release();
        return;
    }

    dim3 block, patch;
    calcPatchSize(winSize, block, patch, isDeviceArch11_);

    CV_Assert(prevImg.type() == CV_8UC1 || prevImg.type() == CV_8UC3 || prevImg.type() == CV_8UC4);
    CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
    CV_Assert(maxLevel >= 0);
    CV_Assert(winSize.width > 2 && winSize.height > 2);
    CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
    CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);

    if (useInitialFlow)
        CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
    else
        ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);

    GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
    GpuMat temp2 = nextPts.reshape(1);
    multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);

    ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
    status.setTo(Scalar::all(1));

    if (err)
        ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);

    // build the image pyramids.

    prevPyr_.resize(maxLevel + 1);
    nextPyr_.resize(maxLevel + 1);

    int cn = prevImg.channels();

    if (cn == 1 || cn == 4)
    {
        prevImg.convertTo(prevPyr_[0], CV_32F);
        nextImg.convertTo(nextPyr_[0], CV_32F);
    }
    else
    {
        cvtColor(prevImg, dx_calcBuf_, COLOR_BGR2BGRA);
        dx_calcBuf_.convertTo(prevPyr_[0], CV_32F);

        cvtColor(nextImg, dx_calcBuf_, COLOR_BGR2BGRA);
        dx_calcBuf_.convertTo(nextPyr_[0], CV_32F);
    }

    for (int level = 1; level <= maxLevel; ++level)
    {
        pyrDown(prevPyr_[level - 1], prevPyr_[level]);
        pyrDown(nextPyr_[level - 1], nextPyr_[level]);
    }

    loadConstants(make_int2(winSize.width, winSize.height), iters);

    for (int level = maxLevel; level >= 0; level--)
    {
        if (cn == 1)
        {
            lkSparse1_gpu(prevPyr_[level], nextPyr_[level],
                prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
                level, block, patch);
        }
        else
        {
            lkSparse4_gpu(prevPyr_[level], nextPyr_[level],
                prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
                level, block, patch);
        }
    }
}