void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) { CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 ); dst.create( src.cols, src.rows, src.type() ); cudaStream_t stream = StreamAccessor::getStream(s); if (src.elemSize() == 1) { NppStreamHandler h(stream); NppiSize sz; sz.width = src.cols; sz.height = src.rows; nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } else if (src.elemSize() == 4) { arithm::transpose<int>(src, dst, stream); } else // if (src.elemSize() == 8) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); arithm::transpose<double>(src, dst, stream); } }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) ); return; } if (src.depth() == CV_8U) { int cn = src.channels(); if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast<uchar>(s[0]); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) ); return; } } typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream); static const set_caller_t set_callers[] = { kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>, kernelSet<int>, kernelSet<float>, kernelSet<double> }; set_callers[src.depth()](src, s, impl->stream); }
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) { CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8); dst.create( src.cols, src.rows, src.type() ); cudaStream_t stream = StreamAccessor::getStream(s); if (src.elemSize() == 1) { NppStreamHandler h(stream); NppiSize sz; sz.width = src.cols; sz.height = src.rows; nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); } else if (src.elemSize() == 4) { NppStStreamHandler h(stream); NcvSize32u sz; sz.width = src.cols; sz.height = src.rows; ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step), dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) ); } else // if (src.elemSize() == 8) { if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); NppStStreamHandler h(stream); NcvSize32u sz; sz.width = src.cols; sz.height = src.rows; ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step), dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) ); } if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); static const func_t funcs[] = { ::minMaxLoc::run<uchar>, ::minMaxLoc::run<schar>, ::minMaxLoc::run<ushort>, ::minMaxLoc::run<short>, ::minMaxLoc::run<int>, ::minMaxLoc::run<float>, ::minMaxLoc::run<double> }; CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); if (src.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } Size valbuf_size, locbuf_size; ::minMaxLoc::getBufSize(src.cols, src.rows, src.elemSize(), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; double temp1, temp2; Point temp3, temp4; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, minLoc ? &minLoc->x : &temp3.x, maxLoc ? &maxLoc->x : &temp4.x, valBuf, locBuf); }
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) { Size src_size = terminals.size(); CV_Assert(terminals.type() == CV_32S); CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(leftTransp.type() == CV_32S); CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(rightTransp.type() == CV_32S); CV_Assert(top.size() == src_size); CV_Assert(top.type() == CV_32S); CV_Assert(bottom.size() == src_size); CV_Assert(bottom.type() == CV_32S); labels.create(src_size, CV_8U); NppiSize sznpp; sznpp.width = src_size.width; sznpp.height = src_size.height; int bufsz; nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize()) buf.create(1, bufsz, CV_8U); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf) { using namespace mathfunc::minmaxloc; typedef void (*Caller)(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep); static const Caller callers[2][7] = { { min_max_loc_multipass_caller<unsigned char>, min_max_loc_multipass_caller<char>, min_max_loc_multipass_caller<unsigned short>, min_max_loc_multipass_caller<short>, min_max_loc_multipass_caller<int>, min_max_loc_multipass_caller<float>, 0 }, { min_max_loc_caller<unsigned char>, min_max_loc_caller<char>, min_max_loc_caller<unsigned short>, min_max_loc_caller<short>, min_max_loc_caller<int>, min_max_loc_caller<float>, min_max_loc_caller<double> } }; static const MaskedCaller masked_callers[2][7] = { { min_max_loc_mask_multipass_caller<unsigned char>, min_max_loc_mask_multipass_caller<char>, min_max_loc_mask_multipass_caller<unsigned short>, min_max_loc_mask_multipass_caller<short>, min_max_loc_mask_multipass_caller<int>, min_max_loc_mask_multipass_caller<float>, 0 }, { min_max_loc_mask_caller<unsigned char>, min_max_loc_mask_caller<char>, min_max_loc_mask_caller<unsigned short>, min_max_loc_mask_caller<short>, min_max_loc_mask_caller<int>, min_max_loc_mask_caller<float>, min_max_loc_mask_caller<double> } }; CV_Assert(src.channels() == 1); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice())); double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; int minLoc_[2]; int maxLoc_[2]; Size valbuf_size, locbuf_size; get_buf_size_required(src.cols, src.rows, src.elemSize(), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height); valbuf.create(valbuf_size, CV_8U); locbuf.create(locbuf_size, CV_8U); if (mask.empty()) { Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); } else { MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); } if (minLoc) { minLoc->x = minLoc_[0]; minLoc->y = minLoc_[1]; } if (maxLoc) { maxLoc->x = maxLoc_[0]; maxLoc->y = maxLoc_[1]; } }
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) target; (void) autoRelease; throw_nogl(); #else const int kind = arr.kind(); if (kind == _InputArray::OPENGL_TEXTURE) { ogl::Texture2D tex = arr.getOGlTexture2D(); tex.copyTo(*this); setAutoRelease(autoRelease); return; } const Size asize = arr.size(); const int atype = arr.type(); create(asize, atype, target, autoRelease); switch (kind) { case _InputArray::OPENGL_BUFFER: { ogl::Buffer buf = arr.getOGlBuffer(); impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype)); break; } case _InputArray::GPU_MAT: { #if !defined HAVE_CUDA || defined(CUDA_DISABLER) throw_nocuda(); #else GpuMat dmat = arr.getGpuMat(); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows); #endif break; } default: { Mat mat = arr.getMat(); CV_Assert( mat.isContinuous() ); impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data); } } #endif }
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { using namespace mathfunc::minmax; typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep); typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep); static const Caller callers[2][7] = { { min_max_multipass_caller<unsigned char>, min_max_multipass_caller<char>, min_max_multipass_caller<unsigned short>, min_max_multipass_caller<short>, min_max_multipass_caller<int>, min_max_multipass_caller<float>, 0 }, { min_max_caller<unsigned char>, min_max_caller<char>, min_max_caller<unsigned short>, min_max_caller<short>, min_max_caller<int>, min_max_caller<float>, min_max_caller<double> } }; static const MaskedCaller masked_callers[2][7] = { { min_max_mask_multipass_caller<unsigned char>, min_max_mask_multipass_caller<char>, min_max_mask_multipass_caller<unsigned short>, min_max_mask_multipass_caller<short>, min_max_mask_multipass_caller<int>, min_max_mask_multipass_caller<float>, 0 }, { min_max_mask_caller<unsigned char>, min_max_mask_caller<char>, min_max_mask_caller<unsigned short>, min_max_mask_caller<short>, min_max_mask_caller<int>, min_max_mask_caller<float>, min_max_mask_caller<double> } }; CV_Assert(src.channels() == 1); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice())); double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; Size bufSize; get_buf_size_required(src.cols, src.rows, src.elemSize(), bufSize.width, bufSize.height); buf.create(bufSize, CV_8U); if (mask.empty()) { Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); caller(src, minVal, maxVal, buf); } else { MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); caller(src, mask, minVal, maxVal, buf); } }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } if (src.depth() == CV_8U) { int cn = src.channels(); if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast<uchar>(s[0]); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } } setTo(src, s, Impl::getStream(impl)); }
void cv::ogl::Buffer::copyTo(OutputArray arr, cuda::Stream& stream) const { #ifndef HAVE_OPENGL (void) arr; (void) stream; throw_no_ogl(); #else #ifndef HAVE_CUDA (void) arr; (void) stream; throw_no_cuda(); #else arr.create(rows_, cols_, type_); GpuMat dmat = arr.getGpuMat(); impl_->copyTo(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream)); #endif #endif }
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) target; (void) autoRelease; throw_no_ogl(); #else const int kind = arr.kind(); const Size asize = arr.size(); const int atype = arr.type(); create(asize, atype, target, autoRelease); switch (kind) { case _InputArray::OPENGL_BUFFER: { ogl::Buffer buf = arr.getOGlBuffer(); impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype)); break; } case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); #else GpuMat dmat = arr.getGpuMat(); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows); #endif break; } default: { Mat mat = arr.getMat(); CV_Assert( mat.isContinuous() ); impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data); } } #endif }
void cv::ogl::Buffer::copyFrom(InputArray arr, cuda::Stream& stream, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) stream; (void) target; (void) autoRelease; throw_no_ogl(); #else #ifndef HAVE_CUDA (void) arr; (void) stream; (void) target; (void) autoRelease; throw_no_cuda(); #else GpuMat dmat = arr.getGpuMat(); create(dmat.size(), dmat.type(), target, autoRelease); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream)); #endif #endif }
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { using namespace ::cv::gpu::device::matrix_reductions::minmaxloc; typedef void (*Caller)(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb); typedef void (*MaskedCaller)(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb); static Caller multipass_callers[] = { minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>, minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>, minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0 }; static Caller singlepass_callers[] = { minMaxLocCaller<unsigned char>, minMaxLocCaller<char>, minMaxLocCaller<unsigned short>, minMaxLocCaller<short>, minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double> }; static MaskedCaller masked_multipass_callers[] = { minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>, minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>, minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0 }; static MaskedCaller masked_singlepass_callers[] = { minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>, minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>, minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double> }; CV_Assert(src.depth() <= CV_64F); CV_Assert(src.channels() == 1); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); if (src.depth() == CV_64F) { if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; int minLoc_[2]; int maxLoc_[2]; Size valbuf_size, locbuf_size; getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); if (mask.empty()) { Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Caller caller = callers[src.type()]; CV_Assert(caller != 0); caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); } else { MaskedCaller* callers = masked_multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = masked_singlepass_callers; MaskedCaller caller = callers[src.type()]; CV_Assert(caller != 0); caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); } if (minLoc) { minLoc->x = minLoc_[0]; minLoc->y = minLoc_[1]; } if (maxLoc) { maxLoc->x = maxLoc_[0]; maxLoc->y = maxLoc_[1]; } }
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { using namespace mathfunc::minmax; typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep); typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep); static Caller multipass_callers[7] = { minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>, minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>, minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 }; static Caller singlepass_callers[7] = { minMaxCaller<unsigned char>, minMaxCaller<char>, minMaxCaller<unsigned short>, minMaxCaller<short>, minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> }; static MaskedCaller masked_multipass_callers[7] = { minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>, minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>, minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 }; static MaskedCaller masked_singlepass_callers[7] = { minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>, minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>, minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double> }; CV_Assert(src.channels() == 1); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; Size buf_size; getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); if (mask.empty()) { Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Caller caller = callers[src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); caller(src, minVal, maxVal, buf); } else { MaskedCaller* callers = masked_multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = masked_singlepass_callers; MaskedCaller caller = callers[src.type()]; if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); caller(src, mask, minVal, maxVal, buf); } }