int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::device::fast; if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); if (count_ == 0) return 0; ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints); if (nonmaxSupression) return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW)); GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0)); kpLoc_.colRange(0, count_).copyTo(locRow); keypoints.row(1).setTo(Scalar::all(0)); return count_; }
void cv::gpu::ORB_GPU::downloadKeyPoints(GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints) { if (d_keypoints.empty()) { keypoints.clear(); return; } Mat h_keypoints(d_keypoints); convertKeyPoints(h_keypoints, keypoints); }
void cv::gpu::pyrDown(InputArray _src, OutputArray _dst, Stream& stream) { using namespace cv::gpu::cudev::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[6][4] = { {pyrDown_gpu<uchar> , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3> , pyrDown_gpu<uchar4> }, {0 /*pyrDown_gpu<schar>*/, 0 /*pyrDown_gpu<schar2>*/ , 0 /*pyrDown_gpu<schar3>*/, 0 /*pyrDown_gpu<schar4>*/}, {pyrDown_gpu<ushort> , 0 /*pyrDown_gpu<ushort2>*/, pyrDown_gpu<ushort3> , pyrDown_gpu<ushort4> }, {pyrDown_gpu<short> , 0 /*pyrDown_gpu<short2>*/ , pyrDown_gpu<short3> , pyrDown_gpu<short4> }, {0 /*pyrDown_gpu<int>*/ , 0 /*pyrDown_gpu<int2>*/ , 0 /*pyrDown_gpu<int3>*/ , 0 /*pyrDown_gpu<int4>*/ }, {pyrDown_gpu<float> , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3> , pyrDown_gpu<float4> } }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert( func != 0 ); _dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); GpuMat dst = _dst.getGpuMat(); func(src, dst, StreamAccessor::getStream(stream)); }
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { using namespace ::cv::gpu::device::matrix_reductions::countnonzero; typedef int (*Caller)(const PtrStepSzb src, PtrStepb buf); static Caller multipass_callers[7] = { countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>, countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>, countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0 }; static Caller singlepass_callers[7] = { countNonZeroCaller<unsigned char>, countNonZeroCaller<char>, countNonZeroCaller<unsigned short>, countNonZeroCaller<short>, countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> }; CV_Assert(src.depth() <= CV_64F); CV_Assert(src.channels() == 1); if (src.depth() == CV_64F) { if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size buf_size; getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Caller caller = callers[src.type()]; CV_Assert(caller != 0); return caller(src, buf); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) ); return; } if (src.depth() == CV_8U) { int cn = src.channels(); if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast<uchar>(s[0]); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) ); return; } } typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream); static const set_caller_t set_callers[] = { kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>, kernelSet<int>, kernelSet<float>, kernelSet<double> }; set_callers[src.depth()](src, s, impl->stream); }
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) { using namespace cv::gpu::device::imgproc; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[6][4] = { {pyrUp_gpu<uchar> , 0 /*pyrUp_gpu<uchar2>*/ , pyrUp_gpu<uchar3> , pyrUp_gpu<uchar4> }, {0 /*pyrUp_gpu<schar>*/, 0 /*pyrUp_gpu<schar2>*/ , 0 /*pyrUp_gpu<schar3>*/, 0 /*pyrUp_gpu<schar4>*/}, {pyrUp_gpu<ushort> , 0 /*pyrUp_gpu<ushort2>*/, pyrUp_gpu<ushort3> , pyrUp_gpu<ushort4> }, {pyrUp_gpu<short> , 0 /*pyrUp_gpu<short2>*/ , pyrUp_gpu<short3> , pyrUp_gpu<short4> }, {0 /*pyrUp_gpu<int>*/ , 0 /*pyrUp_gpu<int2>*/ , 0 /*pyrUp_gpu<int3>*/ , 0 /*pyrUp_gpu<int4>*/ }, {pyrUp_gpu<float> , 0 /*pyrUp_gpu<float2>*/ , pyrUp_gpu<float3> , pyrUp_gpu<float4> } }; CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); dst.create(src.rows * 2, src.cols * 2, src.type()); func(src, dst, StreamAccessor::getStream(stream)); }
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); #ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[] = { ::minMaxLoc::run<uchar>, 0/*::minMaxLoc::run<schar>*/, 0/*::minMaxLoc::run<ushort>*/, 0/*::minMaxLoc::run<short>*/, ::minMaxLoc::run<int>, ::minMaxLoc::run<float>, 0/*::minMaxLoc::run<double>*/, }; #else static const func_t funcs[] = { ::minMaxLoc::run<uchar>, ::minMaxLoc::run<schar>, ::minMaxLoc::run<ushort>, ::minMaxLoc::run<short>, ::minMaxLoc::run<int>, ::minMaxLoc::run<float>, ::minMaxLoc::run<double>, }; #endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); if (src.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } Size valbuf_size, locbuf_size; ::minMaxLoc::getBufSize(src.cols, src.rows, src.elemSize(), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; Point temp3, temp4; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, minLoc ? &minLoc->x : &temp3.x, maxLoc ? &maxLoc->x : &temp4.x, valBuf, locBuf); }
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, /*out*/unsigned int& numDetections) { calculateMemReqsAndAllocate(src.size()); NCVMemPtr src_beg; src_beg.ptr = (void*)src.ptr<Ncv8u>(); src_beg.memtype = NCVMemoryTypeDevice; NCVMemSegment src_seg; src_seg.begin = src_beg; src_seg.size = src.step * src.rows; NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true); ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); CV_Assert(objects.rows == 1); NCVMemPtr objects_beg; objects_beg.ptr = (void*)objects.ptr<NcvRect32u>(); objects_beg.memtype = NCVMemoryTypeDevice; NCVMemSegment objects_seg; objects_seg.begin = objects_beg; objects_seg.size = objects.step * objects.rows; NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols); ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); NcvSize32u roi; roi.width = d_src.width(); roi.height = d_src.height(); NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); Ncv32u flags = 0; flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0; flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0; ncvStat = ncvDetectObjectsMultiScale_device( d_src, roi, d_rects, numDetections, haar, *h_haarStages, *d_haarStages, *d_haarNodes, *d_haarFeatures, winMinSize, minNeighbors, scaleStep, 1, flags, *gpuAllocator, *cpuAllocator, devProp, 0); ncvAssertReturnNcvStat(ncvStat); ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); return NCV_SUCCESS; }
void cv::gpu::ORB_GPU::computeDescriptors(GpuMat& descriptors) { using namespace cv::gpu::device::orb; int nAllkeypoints = 0; for (int level = 0; level < nLevels_; ++level) nAllkeypoints += keyPointsCount_[level]; if (nAllkeypoints == 0) { descriptors.release(); return; } ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors); int offset = 0; for (int level = 0; level < nLevels_; ++level) { if (keyPointsCount_[level] == 0) continue; GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]); if (blurForDescriptor) { // preprocess the resized image ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); blurFilter->apply(imagePyr_[level], buf_, Rect(0, 0, imagePyr_[level].cols, imagePyr_[level].rows)); } computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0); offset += keyPointsCount_[level]; } }
void cv::gpu::buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream) { using namespace cv::gpu::device::imgproc; CV_Assert(M.rows == 3 && M.cols == 3); xmap.create(dsize, CV_32FC1); ymap.create(dsize, CV_32FC1); float coeffs[3 * 3]; Mat coeffsMat(3, 3, CV_32F, (void*)coeffs); if (inverse) M.convertTo(coeffsMat, coeffsMat.type()); else { cv::Mat iM; invert(M, iM); iM.convertTo(coeffsMat, coeffsMat.type()); } buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream)); }
GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream) { GpuMat src; #ifndef HAVE_CUDA (void) _src; (void) stream; throw_no_cuda(); #else if (_src.kind() == _InputArray::CUDA_GPU_MAT) { src = _src.getGpuMat(); } else if (!_src.empty()) { BufferPool pool(stream); src = pool.getBuffer(_src.size(), _src.type()); src.upload(_src, stream); } #endif return src; }
LevelsInit() { nValues3[0] = nValues3[1] = nValues3[2] = 256; for (int i = 0; i < 256; ++i) pLevels[i] = i; #if (CUDA_VERSION <= 4020) pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; #else d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>(); #endif }
void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask, Stream& stream) { if (query.empty() || train.empty()) return; using namespace cv::gpu::device::bf_match; typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); static const caller_t callersL1[] = { matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, matchL1_gpu<unsigned short>, matchL1_gpu<short>, matchL1_gpu<int>, matchL1_gpu<float> }; static const caller_t callersL2[] = { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, matchL2_gpu<float> }; static const caller_t callersHamming[] = { matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ }; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); caller_t func = callers[query.depth()]; CV_Assert(func != 0); DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); func(query, train, mask, trainIdx, distance, cc, StreamAccessor::getStream(stream)); }
int faceDetect_GPU(Mat &image, vector<Mat> &faces, int init) { static CascadeClassifier_GPU cascade_gpu; static HANDLE init_mutex; if(init == 1) { init_mutex = CreateMutex(NULL, FALSE, NULL); cascade_gpu.load(string(CASCADE_PATH)); return 0; } Mat gray; cvtColor(image, gray, CV_BGR2GRAY); equalizeHist(gray, gray); GpuMat image_gpu(gray); GpuMat objbuf; WaitForSingleObject(init_mutex, INFINITE); double start = GetTickCount(); int detections_number = cascade_gpu.detectMultiScale(image_gpu, objbuf, 1.1, 3); cout << "Face Detect GPU Time : " << GetTickCount()-start << "ms" << endl; ReleaseMutex(init_mutex); Mat obj_host; objbuf.colRange(0, detections_number).download(obj_host); Rect* det_faces = obj_host.ptr<Rect>(); for(int i = 0; i < detections_number; ++i) { Mat face = gray(det_faces[i]); faces.push_back(face); } return 0; }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { CV_Assert((src.depth() != CV_64F) || (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } if (src.depth() == CV_8U) { int cn = src.channels(); if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast<uchar>(s[0]); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } } setTo(src, s, Impl::getStream(impl)); }
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) { using namespace cv::gpu::device::matrix_reductions::sum; typedef void (*Caller)(const PtrStepSzb, PtrStepb, double*, int); static Caller multipass_callers[] = { sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>, sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>, sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float> }; static Caller singlepass_callers[7] = { sqrSumCaller<unsigned char>, sqrSumCaller<char>, sqrSumCaller<unsigned short>, sqrSumCaller<short>, sqrSumCaller<int>, sqrSumCaller<float> }; CV_Assert(src.depth() <= CV_32F); Caller* callers = multipass_callers; if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) callers = singlepass_callers; Size buf_size; getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); Caller caller = callers[src.depth()]; double result[4]; caller(src, buf, result, src.channels()); return Scalar(result[0], result[1], result[2], result[3]); }
void _Flow::detect(void) { Frame* pGray; Frame* pNextFrame; Frame* pPrevFrame; GpuMat* pPrev; GpuMat* pNext; GpuMat GMat; GpuMat pGMat[2]; if(m_pStream==NULL)return; pGray = m_pStream->getGrayFrame(); if(pGray->empty())return; pNextFrame = m_pGrayFrames->getLastFrame(); if(pGray->getFrameID() <= pNextFrame->getFrameID())return; m_pGrayFrames->updateFrameIndex(); pNextFrame = m_pGrayFrames->getLastFrame(); pPrevFrame = m_pGrayFrames->getPrevFrame(); pNextFrame->getResizedOf(pGray,m_width,m_height); pPrev = pPrevFrame->getGMat(); pNext = pNextFrame->getGMat(); if(pPrev->empty())return; if(pNext->empty())return; if(pPrev->size() != pNext->size())return; m_pFarn->calc(*pPrev, *pNext, m_GFlowMat); //Generate Depth Map from Flow if(m_bDepth==0)return; cuda::abs(m_GFlowMat, GMat); cuda::split(GMat, pGMat); cuda::add(pGMat[0],pGMat[1], GMat); cuda::multiply(GMat, Scalar(100), pGMat[1]); pGMat[1].convertTo(*(m_pDepth->getGMat()),CV_8UC1); m_pDepth->updatedGMat(); // m_flowMax = cuda::sum(fGMat)[0] / (fGMat.cols*fGMat.rows); // fInterval = 1.0/m_flowMax; // fInterval *= 50.0; // cuda::min(fGMat,Scalar(m_flowMax),pGMat[0]); // cuda::multiply(pGMat[0],Scalar(fInterval),fGMat); // cv::cuda::cvtColor(depthGMat, idxGMat, CV_GRAY2BGR); // m_pGpuLUT->transform(idxGMat,segGMat); }
void cv::gpu::GeneralizedHough_GPU::download(const GpuMat& d_positions, OutputArray h_positions_, OutputArray h_votes_) { if (d_positions.empty()) { h_positions_.release(); if (h_votes_.needed()) h_votes_.release(); return; } CV_Assert(d_positions.rows == 2 && d_positions.type() == CV_32FC4); h_positions_.create(1, d_positions.cols, CV_32FC4); Mat h_positions = h_positions_.getMat(); d_positions.row(0).download(h_positions); if (h_votes_.needed()) { h_votes_.create(1, d_positions.cols, CV_32SC3); Mat h_votes = h_votes_.getMat(); GpuMat d_votes(1, d_positions.cols, CV_32SC3, const_cast<int3*>(d_positions.ptr<int3>(1))); d_votes.download(h_votes); } }
//cuda version void LKTracker::normCrossCorrelation(const GpuMat& img1, const GpuMat& img2, const GpuMat& gPoints1, const GpuMat& gPoints2, const vector<Point2f>& points1, const vector<Point2f> points2) { GpuMat res; GpuMat rec0; GpuMat rec1; similarity.clear(); for (int i = 0; i < points1.size(); i++) { if (status[i] == 1) { Rect loc0(points1[i].x, points1[i].y, 10, 10); Rect loc1(points2[i].x, points2[i].y, 10, 10); rec0 = GpuMat(img1, loc0); rec1 = GpuMat(img2, loc1); gpu::matchTemplate(rec0, rec1, res, CV_TM_CCOEFF_NORMED); similarity.push_back(((float *)(res.data))[0]); } else { similarity.push_back(0.0); } } rec0.release(); rec1.release(); res.release(); }
void cv::ogl::Buffer::copyFrom(InputArray arr, cuda::Stream& stream, Target target, bool autoRelease) { #ifndef HAVE_OPENGL (void) arr; (void) stream; (void) target; (void) autoRelease; throw_no_ogl(); #else #ifndef HAVE_CUDA (void) arr; (void) stream; (void) target; (void) autoRelease; throw_no_cuda(); #else GpuMat dmat = arr.getGpuMat(); create(dmat.size(), dmat.type(), target, autoRelease); impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream)); #endif #endif }
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) { CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.type() == CV_8UC4 || src1.type() == CV_32FC1); dst.create( src1.size(), CV_8UC1 ); static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ }; NppiSize sz; sz.width = src1.cols; sz.height = src1.rows; if (src1.type() == CV_8UC4) { if (cmpop != CMP_NE) { nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) ); } else { mathfunc::compare_ne_8uc4(src1, src2, dst); } } else { if (cmpop != CMP_NE) { nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) ); } else { mathfunc::compare_ne_32f(src1, src2, dst); } } }
void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection, Stream& stream) { if (query.empty() || trainCollection.empty()) return; using namespace cv::gpu::device::bf_knnmatch; typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); static const caller_t callersL1[] = { match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/, match2L1_gpu<unsigned short>, match2L1_gpu<short>, match2L1_gpu<int>, match2L1_gpu<float> }; static const caller_t callersL2[] = { 0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/, 0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/, 0/*match2L2_gpu<int>*/, match2L2_gpu<float> }; static const caller_t callersHamming[] = { match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/, match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/, match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/ }; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); if (stream) stream.enqueueMemSet(trainIdx, Scalar::all(-1)); else trainIdx.setTo(Scalar::all(-1)); caller_t func = callers[query.depth()]; CV_Assert(func != 0); DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream)); }
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors, bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/) { CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); const int defaultObjSearchNum = 100; if (objectsBuf.empty()) { objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type); } cv::Size ncvMinSize = this->getClassifierCvSize(); if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height) { ncvMinSize.width = minSize.width; ncvMinSize.height = minSize.height; } unsigned int numDetections; ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections)); return numDetections; }
GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1) { if (src.type() == type) return src; const int depth = CV_MAT_DEPTH(type); const int cn = CV_MAT_CN(type); if (src.depth() == depth) { convertToCn(src, buf0, cn); return buf0; } if (src.channels() == cn) { convertToDepth(src, buf1, depth); return buf1; } convertToCn(src, buf0, cn); convertToDepth(buf0, buf1, depth); return buf1; }
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, Stream& stream) { if( ddepth < 0 ) ddepth = src.depth(); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor); f->apply(src, dst, Rect(0, 0, -1, -1), stream); }
void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor) { if( ddepth < 0 ) ddepth = src.depth(); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor); f->apply(src, dst); }
GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf) { switch (arr.kind()) { case _InputArray::GPU_MAT: return arr.getGpuMat(); case _InputArray::OPENGL_BUFFER: arr.getOGlBuffer().copyTo(buf); return buf; default: buf.upload(arr.getMat()); return buf; } }
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) { class LevelsInit { public: Npp32s pLevels[256]; const Npp32s* pLevels3[3]; int nValues3[3]; LevelsInit() { nValues3[0] = nValues3[1] = nValues3[2] = 256; for (int i = 0; i < 256; ++i) pLevels[i] = i; pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; } }; static LevelsInit lvls; int cn = src.channels(); CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous()); dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn)); NppiSize sz; sz.height = src.rows; sz.width = src.cols; Mat nppLut; lut.convertTo(nppLut, CV_32S); if (src.type() == CV_8UC1) { nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) ); } else { Mat nppLut3[3]; const Npp32s* pValues3[3]; if (nppLut.channels() == 1) pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>(); else { cv::split(nppLut, nppLut3); pValues3[0] = nppLut3[0].ptr<Npp32s>(); pValues3[1] = nppLut3[1].ptr<Npp32s>(); pValues3[2] = nppLut3[2].ptr<Npp32s>(); } nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) ); } }
void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) { typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { {NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call }, {NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call }, {NppShift<CV_16U, 1, nppiRShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiRShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiRShiftC_16u_C4R>::call}, {NppShift<CV_16S, 1, nppiRShiftC_16s_C1R>::call, 0, NppShift<CV_16S, 3, nppiRShiftC_16s_C3R>::call, NppShift<CV_16S, 4, nppiRShiftC_16s_C4R>::call}, {NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call}, }; GpuMat src = _src.getGpuMat(); CV_Assert( src.depth() < CV_32F ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); _dst.create(src.size(), src.type()); GpuMat dst = _dst.getGpuMat(); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); }
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor) { int sdepth = src.depth(), cn = src.channels(); if( ddepth < 0 ) ddepth = sdepth; dst.create(src.size(), CV_MAKETYPE(ddepth, cn)); Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor); f->apply(src, dst); }