void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream) { #ifndef HAVE_CUFFT (void) _src1; (void) _src2; (void) _dst; (void) flags; (void) scale; (void) conjB; (void) stream; throw_no_cuda(); #else (void)flags; typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream); static Caller callers[] = { device::mulAndScaleSpectrums, device::mulAndScaleSpectrums_CONJ }; GpuMat src1 = _src1.getGpuMat(); GpuMat src2 = _src2.getGpuMat(); CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2); CV_Assert( src1.size() == src2.size() ); _dst.create(src1.size(), CV_32FC2); GpuMat dst = _dst.getGpuMat(); Caller caller = callers[(int)conjB]; caller(src1, src2, scale, dst, StreamAccessor::getStream(stream)); #endif }
void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) { if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) { GpuMat src1 = _src1.getGpuMat(); GpuMat src2 = _src2.getGpuMat(); CV_Assert( src1.size() == src2.size() ); _dst.create(src1.size(), src1.type()); GpuMat dst = _dst.getGpuMat(); divMat_8uc4_32f(src1, src2, dst, stream); } else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) { GpuMat src1 = _src1.getGpuMat(); GpuMat src2 = _src2.getGpuMat(); CV_Assert( src1.size() == src2.size() ); _dst.create(src1.size(), src1.type()); GpuMat dst = _dst.getGpuMat(); divMat_16sc4_32f(src1, src2, dst, stream); } else { arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar); } }
/** * Allocate memeroy for all the buffers on GPU */ void DepthmapDenoiseWeightedHuberImpl::allocate(int _rows, int _cols, InputArray _gxin, InputArray _gyin){ const GpuMat& gxin = _gxin.getGpuMat(); const GpuMat& gyin = _gyin.getGpuMat(); rows = _rows; cols = _cols; if(!(rows % 32 == 0 && cols % 32 == 0 && cols >= 64)){ CV_Assert(!"For performance reasons, DepthmapDenoiseWeightedHuber currenty only supports multiple of 32 image sizes with cols >= 64. Pad the image to achieve this."); } if(!_a.data){ _a.create(1, rows*cols, CV_32FC1); _a = _a.reshape(0, rows); } FLATALLOC(_d, _a); cachedG = 1; if(gxin.empty() || gyin.empty()){ if(gxin.empty()){ FLATALLOC(_gx, _d); cachedG = 0; }else{ _gx = gxin; } if(gyin.empty()){ FLATALLOC(_gy, _d); cachedG = 0; }else{ _gy = gyin; } }else{ if(!gxin.isContinuous()){ FLATALLOC(_gx, _d); // gxin.copyTo(_gx,cvStream); cvStream.enqueueCopy(gxin, _gx); } if(!gyin.isContinuous()){ FLATALLOC(_gy, _d); // gyin.copyTo(_gy,cvStream); cvStream.enqueueCopy(gyin, _gy); } } FLATALLOC(_qx, _d); FLATALLOC(_qy, _d); FLATALLOC(_g1, _d); FLATALLOC(stableDepth, _d); memZero(_qx, cvStream); memZero(_qy, cvStream); alloced = 1; }
void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, OutputArray _objects, cv::gpu::Stream& s) const { CV_Assert(fields); // only color images and precomputed integrals are supported int type = _image.type(); CV_Assert(type == CV_8UC3 || type == CV_32SC1 || (!_rois.empty())); const cv::gpu::GpuMat image = _image.getGpuMat(); if (_objects.empty()) _objects.create(1, 4096 * sizeof(Detection), CV_8UC1); cv::gpu::GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat(); /// roi Fields& flds = *fields; int shr = flds.shrinkage; flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type()); device::shrink(rois, flds.mask); //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s); if (type == CV_8UC3) { flds.update(image.rows, image.cols, flds.shrinkage); if (flds.check((float)minScale, (float)maxScale, scales)) flds.createLevels(image.rows, image.cols); flds.preprocessor->apply(image, flds.shrunk); integral(flds.shrunk, flds.hogluv, flds.integralBuffer, s); } else { if (s) s.enqueueCopy(image, flds.hogluv); else image.copyTo(flds.hogluv); } flds.detect(objects, s); if ( (flags && NMS_MASK) != NO_REJECT) { cv::gpu::GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows)); flds.suppress(objects, s); flds.suppressed.copyTo(spr); } }
void cv::cuda::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream) { using cv::cuda::device::imgproc::bilateral_filter_gpu; typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s); static const func_t funcs[6][4] = { {bilateral_filter_gpu<uchar> , 0 /*bilateral_filter_gpu<uchar2>*/ , bilateral_filter_gpu<uchar3> , bilateral_filter_gpu<uchar4> }, {0 /*bilateral_filter_gpu<schar>*/, 0 /*bilateral_filter_gpu<schar2>*/ , 0 /*bilateral_filter_gpu<schar3>*/, 0 /*bilateral_filter_gpu<schar4>*/}, {bilateral_filter_gpu<ushort> , 0 /*bilateral_filter_gpu<ushort2>*/, bilateral_filter_gpu<ushort3> , bilateral_filter_gpu<ushort4> }, {bilateral_filter_gpu<short> , 0 /*bilateral_filter_gpu<short2>*/ , bilateral_filter_gpu<short3> , bilateral_filter_gpu<short4> }, {0 /*bilateral_filter_gpu<int>*/ , 0 /*bilateral_filter_gpu<int2>*/ , 0 /*bilateral_filter_gpu<int3>*/ , 0 /*bilateral_filter_gpu<int4>*/ }, {bilateral_filter_gpu<float> , 0 /*bilateral_filter_gpu<float2>*/ , bilateral_filter_gpu<float3> , bilateral_filter_gpu<float4> } }; sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color; sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial; int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2; kernel_size = std::max(radius, 1)*2 + 1; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert( func != 0 ); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); func(src, dst, kernel_size, sigma_spatial, sigma_color, borderMode, StreamAccessor::getStream(stream)); }
void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream) { typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream); static const func_t funcs[6][4] = { {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call}, {0,0,0,0}, {NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call}, {0,0,0,0}, {0,0,0,0}, {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call} }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC ); _dst.create(dsize, src.type()); GpuMat dst = _dst.getGpuMat(); dst.setTo(Scalar::all(0), stream); funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream)); }
void cv::gpu::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream) { GpuMat src = _src.getGpuMat(); dst.resize(src.channels()); if(src.channels() > 0) split_caller(src, &dst[0], stream); }
void cv::gpu::pyrDown(InputArray _src, OutputArray _dst, Stream& stream) { using namespace cv::gpu::cudev::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[6][4] = { {pyrDown_gpu<uchar> , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3> , pyrDown_gpu<uchar4> }, {0 /*pyrDown_gpu<schar>*/, 0 /*pyrDown_gpu<schar2>*/ , 0 /*pyrDown_gpu<schar3>*/, 0 /*pyrDown_gpu<schar4>*/}, {pyrDown_gpu<ushort> , 0 /*pyrDown_gpu<ushort2>*/, pyrDown_gpu<ushort3> , pyrDown_gpu<ushort4> }, {pyrDown_gpu<short> , 0 /*pyrDown_gpu<short2>*/ , pyrDown_gpu<short3> , pyrDown_gpu<short4> }, {0 /*pyrDown_gpu<int>*/ , 0 /*pyrDown_gpu<int2>*/ , 0 /*pyrDown_gpu<int3>*/ , 0 /*pyrDown_gpu<int4>*/ }, {pyrDown_gpu<float> , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3> , pyrDown_gpu<float4> } }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert( func != 0 ); _dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); GpuMat dst = _dst.getGpuMat(); func(src, dst, StreamAccessor::getStream(stream)); }
void cv::gpu::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream) { GpuMat src = _src.getGpuMat(); CV_Assert( src.type() == CV_8UC1 ); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); int intBufSize; nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) ); size_t bufSize = intBufSize + 2 * 256 * sizeof(int); ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf); GpuMat buf = _buf.getGpuMat(); GpuMat hist(1, 256, CV_32SC1, buf.data); GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int)); GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int)); gpu::calcHist(src, hist, _stream); cudaStream_t stream = StreamAccessor::getStream(_stream); NppStreamHandler h(stream); nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) ); hist::equalizeHist(src, dst, lut.ptr<int>(), stream); }
cv::GlBuffer::GlBuffer(InputArray mat_, Usage _usage) : rows_(0), cols_(0), type_(0), usage_(_usage) { #ifndef HAVE_OPENGL (void)mat_; (void)_usage; throw_nogl; #else int kind = mat_.kind(); Size _size = mat_.size(); int _type = mat_.type(); if (kind == _InputArray::GPU_MAT) { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda; #else GpuMat d_mat = mat_.getGpuMat(); impl_ = new Impl(d_mat.rows, d_mat.cols, d_mat.type(), _usage); impl_->copyFrom(d_mat); #endif } else { Mat mat = mat_.getMat(); impl_ = new Impl(mat, _usage); } rows_ = _size.height; cols_ = _size.width; type_ = _type; #endif }
void DepthmapDenoiseWeightedHuberImpl::cacheGValues(InputArray _visibleLightImage){ using namespace cv::cuda::device::dtam_denoise; localStream = cv::cuda::StreamAccessor::getStream(cvStream); if (!_visibleLightImage.empty()){ visibleLightImage=_visibleLightImage.getGpuMat(); cachedG=0; } if(cachedG) return;//already cached if(!alloced) allocate(rows,cols); // Call the gpu function for caching g's loadConstants(rows, cols, 0, 0, 0, 0, 0, 0, 0, 0); CV_Assert(_g1.isContinuous()); float* pp = (float*) visibleLightImage.data;//TODO: write a color version. float* g1p = (float*)_g1.data; float* gxp = (float*)_gx.data; float* gyp = (float*)_gy.data; computeGCaller(pp, g1p, gxp, gyp, cols); cachedG=1; }
void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream) { const GpuMat src = _src.getGpuMat(); CV_Assert(src.depth() == CV_8U && src.channels() < 4); int border_size = search_window/2 + block_window/2; Size esize = src.size() + Size(border_size, border_size) * 2; BufferPool pool(stream); GpuMat extended_src = pool.getBuffer(esize, src.type()); cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size())); int bcols, brows; device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows); GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S); using namespace cv::cuda::device::imgproc; typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0}; _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream)); }
void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream) { GpuMat src = _src.getGpuMat(); _dst.create(src.size(), CV_32FC1); GpuMat dst = _dst.getGpuMat(); npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); }
GpuMat DepthmapDenoiseWeightedHuberImpl::operator()(InputArray _ain, float epsilon,float theta){ const GpuMat& ain=_ain.getGpuMat(); using namespace cv::cuda::device::dtam_denoise; localStream = cv::cuda::StreamAccessor::getStream(cvStream); rows=ain.rows; cols=ain.cols; CV_Assert(ain.cols>0); if(!(ain.rows % 32 == 0 && ain.cols % 32 == 0 && ain.cols >= 64)){ CV_Assert(!"For performance reasons, DepthmapDenoiseWeightedHuber currenty only supports multiple of 32 image sizes with cols >= 64. Pad the image to achieve this."); } rows=ain.rows; cols=ain.cols; if(!ain.isContinuous()){ _a.create(1,rows*cols, CV_32FC1); _a=_a.reshape(0,rows); ain.copyTo(_a,cvStream); }else{ _a=ain; } if(!alloced){ allocate(rows,cols); } if(!visibleLightImage.empty()) cacheGValues(); if(!cachedG){ _gx.setTo(1,cvStream); _gy.setTo(1,cvStream); } if(!dInited){ _a.copyTo(_d,cvStream); dInited=1; } computeSigmas(epsilon,theta); float* d = (float*) _d.data; float* a = (float*) _a.data; float* gxpt = (float*)_gx.data; float* gypt = (float*)_gy.data; float* gqxpt = (float*)_qx.data; float* gqypt = (float*)_qy.data; loadConstants(rows, cols, 0, 0, 0, 0, 0, 0, 0, 0); updateQDCaller ( gqxpt, gqypt, d, a, gxpt, gypt, cols, sigma_q, sigma_d, epsilon, theta); cudaSafeCall(cudaGetLastError()); return _d; }
void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream) { using namespace cv::cuda::device::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); static const func_t funcs[6][4] = { {remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> }, {0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/}, {remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> }, {remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> }, {0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ }, {remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> } }; GpuMat src = _src.getGpuMat(); GpuMat xmap = _xmap.getGpuMat(); GpuMat ymap = _ymap.getGpuMat(); CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size() ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC ); CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); const func_t func = funcs[src.depth()][src.channels() - 1]; if (!func) CV_Error(Error::StsUnsupportedFormat, "Unsupported input type"); _dst.create(xmap.size(), src.type()); GpuMat dst = _dst.getGpuMat(); Scalar_<float> borderValueFloat; borderValueFloat = borderValue; Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap, dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20)); }
void cv::gpu::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream) { typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream); static const hist_t hist_callers[] = { NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist, 0, NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist, NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist, 0, NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist }; GpuMat src = _src.getGpuMat(); GpuMat levels = _levels.getGpuMat(); CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 ); hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); }
Mat getMat(InputArray arr) { if (arr.kind() == _InputArray::GPU_MAT) { Mat m; arr.getGpuMat().download(m); return m; } return arr.getMat(); }
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) target; (void) autoRelease; throw_nogl(); #else const int kind = arr.kind(); if (kind == _InputArray::OPENGL_TEXTURE) { ogl::Texture2D tex = arr.getOGlTexture2D(); tex.copyTo(*this); setAutoRelease(autoRelease); return; } const Size asize = arr.size(); const int atype = arr.type(); create(asize, atype, target, autoRelease); switch (kind) { case _InputArray::OPENGL_BUFFER: { ogl::Buffer buf = arr.getOGlBuffer(); impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype)); break; } case _InputArray::GPU_MAT: { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda(); #else GpuMat dmat = arr.getGpuMat(); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows); #endif break; } default: { Mat mat = arr.getMat(); CV_Assert( mat.isContinuous() ); impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data); } } #endif }
void cv::gpu::calcHist(InputArray _src, OutputArray _hist, Stream& stream) { GpuMat src = _src.getGpuMat(); CV_Assert( src.type() == CV_8UC1 ); _hist.create(1, 256, CV_32SC1); GpuMat hist = _hist.getGpuMat(); hist.setTo(Scalar::all(0), stream); hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream)); }
void cv::cuda::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream) { GpuMat src = _src.getGpuMat(); typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); static const func_t funcs[6][4] = { {device::resize<uchar> , 0 /*device::resize<uchar2>*/ , device::resize<uchar3> , device::resize<uchar4> }, {0 /*device::resize<schar>*/, 0 /*device::resize<char2>*/ , 0 /*device::resize<char3>*/, 0 /*device::resize<char4>*/}, {device::resize<ushort> , 0 /*device::resize<ushort2>*/, device::resize<ushort3> , device::resize<ushort4> }, {device::resize<short> , 0 /*device::resize<short2>*/ , device::resize<short3> , device::resize<short4> }, {0 /*device::resize<int>*/ , 0 /*device::resize<int2>*/ , 0 /*device::resize<int3>*/ , 0 /*device::resize<int4>*/ }, {device::resize<float> , 0 /*device::resize<float2>*/ , device::resize<float3> , device::resize<float4> } }; CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA ); CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); if (dsize == Size()) { dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy)); } else { fx = static_cast<double>(dsize.width) / src.cols; fy = static_cast<double>(dsize.height) / src.rows; } _dst.create(dsize, src.type()); GpuMat dst = _dst.getGpuMat(); if (dsize == src.size()) { src.copyTo(dst, stream); return; } const func_t func = funcs[src.depth()][src.channels() - 1]; if (!func) CV_Error(Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step); func(src, wholeSrc, ofs.y, ofs.x, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), interpolation, StreamAccessor::getStream(stream)); }
GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf) { switch (arr.kind()) { case _InputArray::GPU_MAT: return arr.getGpuMat(); case _InputArray::OPENGL_BUFFER: arr.getOGlBuffer().copyTo(buf); return buf; default: buf.upload(arr.getMat()); return buf; } }
UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf) { switch (arr.kind()) { case _InputArray::CUDA_GPU_MAT: arr.getGpuMat().download(buf); return buf; case _InputArray::OPENGL_BUFFER: arr.getOGlBuffer().copyTo(buf); return buf; default: return arr.getUMat(); } }
void cv::GlTexture::copyFrom(InputArray mat_, bool bgra) { #ifndef HAVE_OPENGL (void)mat_; (void)bgra; throw_nogl; #else int kind = mat_.kind(); Size _size = mat_.size(); int _type = mat_.type(); create(_size, _type); switch(kind) { case _InputArray::OPENGL_TEXTURE: { GlTexture tex = mat_.getGlTexture(); *this = tex; break; } case _InputArray::OPENGL_BUFFER: { GlBuffer buf = mat_.getGlBuffer(); impl_->copyFrom(buf, bgra); break; } case _InputArray::GPU_MAT: { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda; #else GpuMat d_mat = mat_.getGpuMat(); buf_.copyFrom(d_mat); impl_->copyFrom(buf_, bgra); #endif break; } default: { Mat mat = mat_.getMat(); impl_->copyFrom(mat, bgra); } } #endif }
void cv::gpu::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) { typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream); static const hist_t hist_callers[] = { NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist, 0, NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist, NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist }; GpuMat src = _src.getGpuMat(); CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); }
cv::GlTexture::GlTexture(InputArray mat_, bool bgra) : rows_(0), cols_(0), type_(0), buf_(GlBuffer::TEXTURE_BUFFER) { #ifndef HAVE_OPENGL (void)mat_; (void)bgra; throw_nogl; #else int kind = mat_.kind(); Size _size = mat_.size(); int _type = mat_.type(); switch (kind) { case _InputArray::OPENGL_BUFFER: { GlBuffer buf = mat_.getGlBuffer(); impl_ = new Impl(buf, bgra); break; } case _InputArray::GPU_MAT: { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda; #else GpuMat d_mat = mat_.getGpuMat(); GlBuffer buf(d_mat, GlBuffer::TEXTURE_BUFFER); impl_ = new Impl(buf, bgra); #endif break; } default: { Mat mat = mat_.getMat(); impl_ = new Impl(mat, bgra); break; } } rows_ = _size.height; cols_ = _size.width; type_ = _type; #endif }
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) target; (void) autoRelease; throw_no_ogl(); #else const int kind = arr.kind(); const Size asize = arr.size(); const int atype = arr.type(); create(asize, atype, target, autoRelease); switch (kind) { case _InputArray::OPENGL_BUFFER: { ogl::Buffer buf = arr.getOGlBuffer(); impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype)); break; } case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); #else GpuMat dmat = arr.getGpuMat(); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows); #endif break; } default: { Mat mat = arr.getMat(); CV_Assert( mat.isContinuous() ); impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data); } } #endif }
void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream) { const GpuMat src = _src.getGpuMat(); CV_Assert(src.type() == CV_8UC3); BufferPool pool(stream); GpuMat lab = pool.getBuffer(src.size(), src.type()); cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream); GpuMat l = pool.getBuffer(src.size(), CV_8U); GpuMat ab = pool.getBuffer(src.size(), CV_8UC2); device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream)); fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream); fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream); device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream)); cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream); }
void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream) { using cv::cuda::device::imgproc::nlm_bruteforce_gpu; typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream); static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ }; const GpuMat src = _src.getGpuMat(); CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3); const func_t func = funcs[src.channels() - 1]; CV_Assert(func != 0); int b = borderMode; CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream)); }
void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) { typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { {NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call }, {0 , 0, 0 , 0 }, {NppShift<CV_16U, 1, nppiLShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiLShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiLShiftC_16u_C4R>::call}, {0 , 0, 0 , 0 }, {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call}, }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); }
void cv::GlBuffer::copyFrom(InputArray mat_) { #ifndef HAVE_OPENGL (void)mat_; throw_nogl; #else int kind = mat_.kind(); Size _size = mat_.size(); int _type = mat_.type(); create(_size, _type); switch (kind) { case _InputArray::OPENGL_BUFFER: { GlBuffer buf = mat_.getGlBuffer(); *this = buf; break; } case _InputArray::GPU_MAT: { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda; #else GpuMat d_mat = mat_.getGpuMat(); impl_->copyFrom(d_mat); #endif break; } default: { Mat mat = mat_.getMat(); impl_->copyFrom(mat, usage_); } } #endif }