void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert((src1.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); static const func_t funcs[] = { min_caller<uchar>, min_caller<schar>, min_caller<ushort>, min_caller<short>, min_caller<int>, min_caller<float>, min_caller<double> }; funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); }
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); #ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[] = { ::minMaxLoc::run<uchar>, 0/*::minMaxLoc::run<schar>*/, 0/*::minMaxLoc::run<ushort>*/, 0/*::minMaxLoc::run<short>*/, ::minMaxLoc::run<int>, ::minMaxLoc::run<float>, 0/*::minMaxLoc::run<double>*/, }; #else static const func_t funcs[] = { ::minMaxLoc::run<uchar>, ::minMaxLoc::run<schar>, ::minMaxLoc::run<ushort>, ::minMaxLoc::run<short>, ::minMaxLoc::run<int>, ::minMaxLoc::run<float>, ::minMaxLoc::run<double>, }; #endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); if (src.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size valbuf_size, locbuf_size; ::minMaxLoc::getBufSize(src.cols, src.rows, src.elemSize(), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; Point temp3, temp4; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, minLoc ? &minLoc->x : &temp3.x, maxLoc ? &maxLoc->x : &temp4.x, valBuf, locBuf); }
void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask, Stream& stream) { if (query.empty() || train.empty()) return; using namespace cv::cuda::device::bf_match; typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); static const caller_t callersL1[] = { matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, matchL1_gpu<unsigned short>, matchL1_gpu<short>, matchL1_gpu<int>, matchL1_gpu<float> }; static const caller_t callersL2[] = { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, matchL2_gpu<float> }; static const caller_t callersHamming[] = { matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ }; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); caller_t func = callers[query.depth()]; CV_Assert(func != 0); func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); }
Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); #ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[7][5] = { {0, ::sum::runSqr<uchar , 1>, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, ::sum::runSqr<float , 1>, 0, 0, 0}, {0, 0, 0, 0, 0}, }; #else static const func_t funcs[7][5] = { {0, ::sum::runSqr<uchar , 1>, ::sum::runSqr<uchar , 2>, ::sum::runSqr<uchar , 3>, ::sum::runSqr<uchar , 4>}, {0, ::sum::runSqr<schar , 1>, ::sum::runSqr<schar , 2>, ::sum::runSqr<schar , 3>, ::sum::runSqr<schar , 4>}, {0, ::sum::runSqr<ushort, 1>, ::sum::runSqr<ushort, 2>, ::sum::runSqr<ushort, 3>, ::sum::runSqr<ushort, 4>}, {0, ::sum::runSqr<short , 1>, ::sum::runSqr<short , 2>, ::sum::runSqr<short , 3>, ::sum::runSqr<short , 4>}, {0, ::sum::runSqr<int , 1>, ::sum::runSqr<int , 2>, ::sum::runSqr<int , 3>, ::sum::runSqr<int , 4>}, {0, ::sum::runSqr<float , 1>, ::sum::runSqr<float , 2>, ::sum::runSqr<float , 3>, ::sum::runSqr<float , 4>}, {0, ::sum::runSqr<double, 1>, ::sum::runSqr<double, 2>, ::sum::runSqr<double, 3>, ::sum::runSqr<double, 4>} }; #endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); if (src.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size buf_size; ::sum::getBufSize(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); return Scalar(result[0], result[1], result[2], result[3]); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); CV_Assert(mask.type() == CV_8UC1); typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream); static const set_caller_t set_callers[] = { kernelSetMask<uchar>, kernelSetMask<schar>, kernelSetMask<ushort>, kernelSetMask<short>, kernelSetMask<int>, kernelSetMask<float>, kernelSetMask<double> }; set_callers[src.depth()](src, val, mask, impl->stream); }
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, const GpuMat& trainCollection, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& masks, Stream& stream) { if (query.empty() || trainCollection.empty()) return; using namespace ::cv::gpu::device::bf_match; typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, matchL1_gpu<unsigned short>, matchL1_gpu<short>, matchL1_gpu<int>, matchL1_gpu<float> }, { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, matchL2_gpu<float> }, { matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ } }; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); caller_t func = callers[distType][query.depth()]; CV_Assert(func != 0); DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); func(query, trainCollection, masks, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream)); }
void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) { CV_Assert(!image.empty()); int ch = image.channels(); CV_Assert(ch <= 4); int depth = image.depth(); typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); static const func_t suppotLookup[8][4] = { // 1, 2, 3, 4 { device::ccl::computeEdges<uchar>, 0, device::ccl::computeEdges<uchar3>, device::ccl::computeEdges<uchar4> },// CV_8U { 0, 0, 0, 0 },// CV_16U { device::ccl::computeEdges<ushort>, 0, device::ccl::computeEdges<ushort3>, device::ccl::computeEdges<ushort4> },// CV_8S { 0, 0, 0, 0 },// CV_16S { device::ccl::computeEdges<int>, 0, 0, 0 },// CV_32S { device::ccl::computeEdges<float>, 0, 0, 0 },// CV_32F { 0, 0, 0, 0 },// CV_64F { 0, 0, 0, 0 } // CV_USRTYPE1 }; func_t f = suppotLookup[depth][ch - 1]; CV_Assert(f); if (image.size() != mask.size() || mask.type() != CV_8UC1) mask.create(image.size(), CV_8UC1); cudaStream_t stream = StreamAccessor::getStream(s); float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi); f(image, mask, culo, cuhi, stream); }
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) { using namespace mathfunc; typedef void (*Caller)(const DevMem2D, PtrStep, double*, int); static Caller multipass_callers[7] = { sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>, sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>, sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>, 0 }; static Caller singlepass_callers[7] = { sqrSumCaller<unsigned char>, sqrSumCaller<char>, sqrSumCaller<unsigned short>, sqrSumCaller<short>, sqrSumCaller<int>, sqrSumCaller<float>, 0 }; Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Size buf_size; sums::getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); Caller caller = callers[src.depth()]; if (!caller) CV_Error(CV_StsBadArg, "sqrSum: unsupported type"); double result[4]; caller(src, buf, result, src.channels()); return Scalar(result[0], result[1], result[2], result[3]); }
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) { CV_Assert( src.depth() != CV_64F ); dst.create(src.size(), src.type()); typedef void (*caller_t)(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); static const caller_t callers[] = { mathfunc::pow_caller<unsigned char>, mathfunc::pow_caller<signed char>, mathfunc::pow_caller<unsigned short>, mathfunc::pow_caller<short>, mathfunc::pow_caller<int>, mathfunc::pow_caller<float> }; callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream)); }
void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream) { const GpuMat src = _src.getGpuMat(); CV_Assert(src.depth() == CV_8U && src.channels() < 4); int border_size = search_window/2 + block_window/2; Size esize = src.size() + Size(border_size, border_size) * 2; BufferPool pool(stream); GpuMat extended_src = pool.getBuffer(esize, src.type()); cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size())); int bcols, brows; device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows); GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S); using namespace cv::cuda::device::imgproc; typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0}; _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream)); }
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst) { using namespace cv::gpu::mathfunc; typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst); static const func_t funcs[] = { transpose_gpu<uchar4>, transpose_gpu<char4>, transpose_gpu<ushort2>, transpose_gpu<short2>, transpose_gpu<int>, transpose_gpu<float> }; CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 || src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1); dst.create( src.cols, src.rows, src.type() ); if (src.type() == CV_8UC1) { NppiSize sz; sz.width = src.cols; sz.height = src.rows; nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) ); } else { funcs[src.depth()](src, dst); } }
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize) { CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); CV_Assert( !this->empty()); const int defaultObjSearchNum = 100; if (objectsBuf.empty()) { objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type); } NcvSize32u ncvMinSize = impl->getClassifierSize(); if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height) { ncvMinSize.width = minSize.width; ncvMinSize.height = minSize.height; } unsigned int numDetections; NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections); if (ncvStat != NCV_SUCCESS) { CV_Error(CV_GpuApiCallError, "Error in face detectioln"); } return numDetections; }
void cv::gpu::matchTemplate( const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream) { CV_Assert(image.type() == templ.type()); CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows); typedef void (*Caller)(const GpuMat&, const GpuMat&, GpuMat&, MatchTemplateBuf&, Stream& stream); static const Caller callers8U[] = { ::matchTemplate_SQDIFF_8U, ::matchTemplate_SQDIFF_NORMED_8U, ::matchTemplate_CCORR_8U, ::matchTemplate_CCORR_NORMED_8U, ::matchTemplate_CCOFF_8U, ::matchTemplate_CCOFF_NORMED_8U }; static const Caller callers32F[] = { ::matchTemplate_SQDIFF_32F, 0, ::matchTemplate_CCORR_32F, 0, 0, 0 }; const Caller* callers = 0; switch (image.depth()) { case CV_8U: callers = callers8U; break; case CV_32F: callers = callers32F; break; default: CV_Error(CV_StsBadArg, "matchTemplate: unsupported data type"); } Caller caller = callers[method]; CV_Assert(caller); caller(image, templ, result, buf, stream); }
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, GpuMat& result, Stream& stream) { CV_Assert(img1.size() == img2.size()); CV_Assert(img1.type() == img2.type()); CV_Assert(weights1.size() == img1.size()); CV_Assert(weights2.size() == img2.size()); CV_Assert(weights1.type() == CV_32F); CV_Assert(weights2.type() == CV_32F); const Size size = img1.size(); const int depth = img1.depth(); const int cn = img1.channels(); result.create(size, CV_MAKE_TYPE(depth, cn)); switch (depth) { case CV_8U: if (cn != 4) blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); else blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); break; case CV_32F: blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); break; default: CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function"); } }
void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream) { using namespace cv::gpu::device::pyramid; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[6][4] = { {kernelDownsampleX2_gpu<uchar1> , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3> , kernelDownsampleX2_gpu<uchar4> }, {0 /*kernelDownsampleX2_gpu<char1>*/ , 0 /*kernelDownsampleX2_gpu<char2>*/ , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ }, {kernelDownsampleX2_gpu<ushort1> , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3> , kernelDownsampleX2_gpu<ushort4> }, {0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/}, {0 /*kernelDownsampleX2_gpu<int1>*/ , 0 /*kernelDownsampleX2_gpu<int2>*/ , 0 /*kernelDownsampleX2_gpu<int3>*/ , 0 /*kernelDownsampleX2_gpu<int4>*/ }, {kernelDownsampleX2_gpu<float1> , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3> , kernelDownsampleX2_gpu<float4> } }; CV_Assert(img.depth() <= CV_32F && img.channels() <= 4); const func_t func = funcs[img.depth()][img.channels() - 1]; CV_Assert(func != 0); layer0_ = img; Size szLastLayer = img.size(); nLayers_ = 1; if (numLayers <= 0) numLayers = 255; //it will cut-off when any of the dimensions goes 1 pyramid_.resize(numLayers); for (int i = 0; i < numLayers - 1; ++i) { Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2); if (szCurLayer.width == 0 || szCurLayer.height == 0) break; ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]); nLayers_++; const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1]; func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream)); szLastLayer = szCurLayer; } }
void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const { using namespace cv::gpu::device::pyramid; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[6][4] = { {kernelInterpolateFrom1_gpu<uchar1> , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3> , kernelInterpolateFrom1_gpu<uchar4> }, {0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/ , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ }, {kernelInterpolateFrom1_gpu<ushort1> , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3> , kernelInterpolateFrom1_gpu<ushort4> }, {0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/}, {0 /*kernelInterpolateFrom1_gpu<int1>*/ , 0 /*kernelInterpolateFrom1_gpu<int2>*/ , 0 /*kernelInterpolateFrom1_gpu<int3>*/ , 0 /*kernelInterpolateFrom1_gpu<int4>*/ }, {kernelInterpolateFrom1_gpu<float1> , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3> , kernelInterpolateFrom1_gpu<float4> } }; CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0); ensureSizeIsEnough(outRoi, layer0_.type(), outImg); const func_t func = funcs[outImg.depth()][outImg.channels() - 1]; CV_Assert(func != 0); if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows) { if (stream) stream.enqueueCopy(layer0_, outImg); else layer0_.copyTo(outImg); } float lastScale = 1.0f; float curScale; GpuMat lastLayer = layer0_; GpuMat curLayer; for (int i = 0; i < nLayers_ - 1; ++i) { curScale = lastScale * 0.5f; curLayer = pyramid_[i]; if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows) { if (stream) stream.enqueueCopy(curLayer, outImg); else curLayer.copyTo(outImg); } if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows) break; lastScale = curScale; lastLayer = curLayer; } func(lastLayer, outImg, StreamAccessor::getStream(stream)); }
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { typedef int (*func_t)(const PtrStepSzb src, PtrStep<unsigned int> buf); #ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[] = { ::countNonZero::run<uchar>, 0/*::countNonZero::run<schar>*/, 0/*::countNonZero::run<ushort>*/, 0/*::countNonZero::run<short>*/, 0/*::countNonZero::run<int>*/, ::countNonZero::run<float>, 0/*::countNonZero::run<double>*/, }; #else static const func_t funcs[] = { ::countNonZero::run<uchar>, ::countNonZero::run<schar>, ::countNonZero::run<ushort>, ::countNonZero::run<short>, ::countNonZero::run<int>, ::countNonZero::run<float>, ::countNonZero::run<double>, }; #endif CV_Assert(src.channels() == 1); if (src.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size buf_size; ::countNonZero::getBufSize(src.cols, src.rows, buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); return func(src, buf); }
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask, Stream& stream) { if (query.empty() || train.empty()) return; using namespace cv::gpu::device::bf_match; typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); static const caller_t callers[3][6] = { { matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, matchL1_gpu<unsigned short>, matchL1_gpu<short>, matchL1_gpu<int>, matchL1_gpu<float> }, { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, matchL2_gpu<float> }, { matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ } }; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); caller_t func = callers[distType][query.depth()]; CV_Assert(func != 0); func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); CV_Assert(mask.type() == CV_8UC1); setTo(src, val, mask, Impl::getStream(impl)); }
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor) { if( ddepth < 0 ) ddepth = src.depth(); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor); f->apply(src, dst); }
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, Stream& stream) { if( ddepth < 0 ) ddepth = src.depth(); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor); f->apply(src, dst, Rect(0, 0, -1, -1), stream); }
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream) { using namespace cv::gpu::device::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc); static const func_t funcs[6][4] = { {remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> }, {0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/}, {remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> }, {remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> }, {0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ }, {remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> } }; CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size()); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP); const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType)); dst.create(xmap.size(), src.type()); Scalar_<float> borderValueFloat; borderValueFloat = borderValue; DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap, dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc); }
void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream) { using namespace cv::cuda::device::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); static const func_t funcs[6][4] = { {remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> }, {0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/}, {remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> }, {remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> }, {0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ }, {remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> } }; GpuMat src = _src.getGpuMat(); GpuMat xmap = _xmap.getGpuMat(); GpuMat ymap = _ymap.getGpuMat(); CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size() ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC ); CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); const func_t func = funcs[src.depth()][src.channels() - 1]; if (!func) CV_Error(Error::StsUnsupportedFormat, "Unsupported input type"); _dst.create(xmap.size(), src.type()); GpuMat dst = _dst.getGpuMat(); Scalar_<float> borderValueFloat; borderValueFloat = borderValue; Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap, dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20)); }
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) { typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); static const func_t funcs[6][4] = { {NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call}, {0,0,0,0}, {NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call}, {0,0,0,0}, {NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call}, {NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call} }; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); dst.create(src.size(), src.type()); funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); }
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor) { int sdepth = src.depth(), cn = src.channels(); if( ddepth < 0 ) ddepth = sdepth; dst.create(src.size(), CV_MAKETYPE(ddepth, cn)); Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor); f->apply(src, dst); }
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, int rowBorderType, int columnBorderType, Stream& stream) { if( ddepth < 0 ) ddepth = src.depth(); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType); f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream); }
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { using namespace ::cv::gpu::device::matrix_reductions::countnonzero; typedef int (*Caller)(const PtrStepSzb src, PtrStepb buf); static Caller multipass_callers[7] = { countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>, countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>, countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0 }; static Caller singlepass_callers[7] = { countNonZeroCaller<unsigned char>, countNonZeroCaller<char>, countNonZeroCaller<unsigned short>, countNonZeroCaller<short>, countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> }; CV_Assert(src.depth() <= CV_64F); CV_Assert(src.channels() == 1); if (src.depth() == CV_64F) { if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size buf_size; getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Caller caller = callers[src.type()]; CV_Assert(caller != 0); return caller(src, buf); }
void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) { typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { {NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call }, {0 , 0, 0 , 0 }, {NppShift<CV_16U, 1, nppiLShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiLShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiLShiftC_16u_C4R>::call}, {0 , 0, 0 , 0 }, {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call}, }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); }
void cv::gpu::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream) { typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream); static const hist_t hist_callers[] = { NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist, 0, NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist, NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist }; GpuMat src = _src.getGpuMat(); if (src.depth() == CV_8U && deviceSupports(FEATURE_SET_COMPUTE_30)) { histEven8u(src, hist.getGpuMatRef(), histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); return; } CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } if (src.depth() == CV_8U) { int cn = src.channels(); if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast<uchar>(s[0]); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } } setTo(src, s, Impl::getStream(impl)); }