void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) { #if (CUDA_VERSION < 5000) CV_Assert(terminals.type() == CV_32S); #else CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); #endif Size src_size = terminals.size(); CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(leftTransp.type() == terminals.type()); CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(rightTransp.type() == terminals.type()); CV_Assert(top.size() == src_size); CV_Assert(top.type() == terminals.type()); CV_Assert(bottom.size() == src_size); CV_Assert(bottom.type() == terminals.type()); labels.create(src_size, CV_8U); NppiSize sznpp; sznpp.width = src_size.width; sznpp.height = src_size.height; int bufsz; nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); ensureSizeIsEnough(1, bufsz, CV_8U, buf); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc); #if (CUDA_VERSION < 5000) nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); #else if (terminals.type() == CV_32S) { nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } else { nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } #endif if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void SurfFeaturesFinderGpu::find(const Mat &image, ImageFeatures &features) { CV_Assert(image.depth() == CV_8U); ensureSizeIsEnough(image.size(), image.type(), image_); image_.upload(image); ensureSizeIsEnough(image.size(), CV_8UC1, gray_image_); cvtColor(image_, gray_image_, CV_BGR2GRAY); surf_.nOctaves = num_octaves_; surf_.nOctaveLayers = num_layers_; surf_.upright = false; surf_(gray_image_, GpuMat(), keypoints_); surf_.nOctaves = num_octaves_descr_; surf_.nOctaveLayers = num_layers_descr_; surf_.upright = true; surf_(gray_image_, GpuMat(), keypoints_, descriptors_, true); surf_.downloadKeypoints(keypoints_, features.keypoints); descriptors_.download(features.descriptors); }
int CFast::getKeyPoints(cv::gpu::GpuMat* pcvgmKeyPoints_) { if (!cv::gpu::TargetArchs::builtWith(cv::gpu::GLOBAL_ATOMICS) || !cv::gpu::DeviceInfo().supports(cv::gpu::GLOBAL_ATOMICS)) CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); if (_uCount == 0) return 0; ensureSizeIsEnough(ROWS_COUNT, _uCount, CV_32FC1, *pcvgmKeyPoints_); if (_bNonMaxSupression) return btl::device::fast::cudaNonMaxSupression(_cvgmKeyPointLocation.ptr<short2>(), _uCount, _cvgmScore, pcvgmKeyPoints_->ptr<short2>(LOCATION_ROW), pcvgmKeyPoints_->ptr<float>(RESPONSE_ROW)); cv::gpu::GpuMat cvgmLocRow(1, _uCount, _cvgmKeyPointLocation.type(), pcvgmKeyPoints_->ptr(0)); _cvgmKeyPointLocation.colRange(0, _uCount).copyTo(cvgmLocRow); pcvgmKeyPoints_->row(1).setTo(cv::Scalar::all(0)); return _uCount; }
void suppress(cv::gpu::GpuMat& objects, cv::gpu::Stream& s) { cv::gpu::GpuMat ndetections = cv::gpu::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1)); ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps); if (s) { s.enqueueMemSet(overlaps, 0); s.enqueueMemSet(suppressed, 0); } else { overlaps.setTo(0); suppressed.setTo(0); } cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s); device::suppress(objects, overlaps, ndetections, suppressed, stream); }
void detectKeypoints(cv::gpu::GpuMat& keypoints, int scales) { ensureSizeIsEnough(SIFT_GPU::ROWS_COUNT, MAXEXTREMAS, CV_32FC1, keypoints); keypoints.setTo(cv::Scalar::all(0)); for (int octave = 0; octave < 1; ++octave) { const int scaleCols = cols >> octave; const int scaleRows = rows >> octave; createDoGSpace(inImage.data, &deviceDoGData, scales, scaleRows, scaleCols); findExtremas(deviceDoGData, &sift_.extremaBuffer, &maxCounter, octave, scales, scaleRows, scaleCols); localization(deviceDoGData, scaleRows, scaleCols, scales, octave, sift_.nOctaves, sift_.extremaBuffer, maxCounter, keypoints.ptr<float>(SIFT_GPU::X_ROW), keypoints.ptr<float>(SIFT_GPU::Y_ROW), keypoints.ptr<float>(SIFT_GPU::OCTAVE_ROW), keypoints.ptr<float>(SIFT_GPU::SIZE_ROW), keypoints.ptr<float>(SIFT_GPU::ANGLE_ROW), keypoints.ptr<float>(SIFT_GPU::RESPONSE_ROW)); } std::cout << "Number of keypoints: " << maxCounter[0] << std::endl; }
static inline void ensureSizeIsEnough(Size size, int type, OutputArray arr) { ensureSizeIsEnough(size.height, size.width, type, arr); }
inline void ensureSizeIsEnough(Size size, int type, oclMat &m) { ensureSizeIsEnough(size.height, size.width, type, m); }