void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s) { ncvSetDebugOutputHandler(outputHandler); CV_Assert(frame0.type() == CV_32FC1); CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type()); u.create(frame0.size(), CV_32FC1); v.create(frame0.size(), CV_32FC1); cudaDeviceProp devProp; cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) ); NCVBroxOpticalFlowDescriptor desc; desc.alpha = alpha; desc.gamma = gamma; desc.scale_factor = scale_factor; desc.number_of_inner_iterations = inner_iterations; desc.number_of_outer_iterations = outer_iterations; desc.number_of_solver_iterations = solver_iterations; NCVMemSegment frame0MemSeg; frame0MemSeg.begin.memtype = NCVMemoryTypeDevice; frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data); frame0MemSeg.size = frame0.step * frame0.rows; NCVMemSegment frame1MemSeg; frame1MemSeg.begin.memtype = NCVMemoryTypeDevice; frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data); frame1MemSeg.size = frame1.step * frame1.rows; NCVMemSegment uMemSeg; uMemSeg.begin.memtype = NCVMemoryTypeDevice; uMemSeg.begin.ptr = u.ptr(); uMemSeg.size = u.step * u.rows; NCVMemSegment vMemSeg; vMemSeg.begin.memtype = NCVMemoryTypeDevice; vMemSeg.begin.ptr = v.ptr(); vMemSeg.size = v.step * v.rows; NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step)); NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step)); NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step)); NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step)); cudaStream_t stream = StreamAccessor::getStream(s); size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp); ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr()); ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) ); }
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s) { CV_Assert(src.type() == CV_8UC1); dst.create(src.size(), src.type()); int intBufSize; nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) ); ensureSizeIsEnough(1, intBufSize + 256 * sizeof(int), CV_8UC1, buf); GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr()); GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize); calcHist(src, hist, s); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) ); hist::equalizeHist(src, dst, lut.ptr<int>(), stream); }
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::cudev::fast; if (count_ == 0) return 0; ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints); if (nonmaxSupression) return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW)); GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0)); kpLoc_.colRange(0, count_).copyTo(locRow); keypoints.row(1).setTo(Scalar::all(0)); return count_; }
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::device::fast; if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); if (count_ == 0) return 0; ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints); if (nonmaxSupression) return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW)); GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0)); kpLoc_.colRange(0, count_).copyTo(locRow); keypoints.row(1).setTo(Scalar::all(0)); return count_; }
void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err) { using namespace cv::gpu::device::pyrlk; if (prevPts.empty()) { nextPts.release(); status.release(); if (err) err->release(); return; } dim3 block, patch; calcPatchSize(winSize, block, patch, isDeviceArch11_); CV_Assert(prevImg.type() == CV_8UC1 || prevImg.type() == CV_8UC3 || prevImg.type() == CV_8UC4); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(maxLevel >= 0); CV_Assert(winSize.width > 2 && winSize.height > 2); CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); if (useInitialFlow) CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); else ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); GpuMat temp2 = nextPts.reshape(1); multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2); ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); status.setTo(Scalar::all(1)); if (err) ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); // build the image pyramids. prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); int cn = prevImg.channels(); if (cn == 1 || cn == 4) { prevImg.convertTo(prevPyr_[0], CV_32F); nextImg.convertTo(nextPyr_[0], CV_32F); } else { cvtColor(prevImg, dx_calcBuf_, COLOR_BGR2BGRA); dx_calcBuf_.convertTo(prevPyr_[0], CV_32F); cvtColor(nextImg, dx_calcBuf_, COLOR_BGR2BGRA); dx_calcBuf_.convertTo(nextPyr_[0], CV_32F); } for (int level = 1; level <= maxLevel; ++level) { pyrDown(prevPyr_[level - 1], prevPyr_[level]); pyrDown(nextPyr_[level - 1], nextPyr_[level]); } loadConstants(make_int2(winSize.width, winSize.height), iters); for (int level = maxLevel; level >= 0; level--) { if (cn == 1) { lkSparse1_gpu(prevPyr_[level], nextPyr_[level], prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols, level, block, patch); } else { lkSparse4_gpu(prevPyr_[level], nextPyr_[level], prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols, level, block, patch); } } }