void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) { bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); if( rtype < 0 ) rtype = src.type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels()); int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { src.copyTo(dst); return; } GpuMat temp; const GpuMat* psrc = &src; if( sdepth != ddepth && psrc == &dst ) psrc = &(temp = src); dst.create( src.size(), rtype ); matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta, impl->stream); }
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors, bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/) { CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); const int defaultObjSearchNum = 100; if (objectsBuf.empty()) { objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type); } cv::Size ncvMinSize = this->getClassifierCvSize(); if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height) { ncvMinSize.width = minSize.width; ncvMinSize.height = minSize.height; } unsigned int numDetections; ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections)); return numDetections; }
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s) { CV_Assert(src.type() == CV_8UC1); dst.create(src.size(), src.type()); int intBufSize; nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) ); ensureSizeIsEnough(1, intBufSize + 256 * sizeof(int), CV_8UC1, buf); GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr()); GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize); calcHist(src, hist, s); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) ); hist::equalizeHist(src, dst, lut.ptr<int>(), stream); }
void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s) { CV_Assert(src.depth() == CV_8U && src.channels() < 4); int border_size = search_window/2 + block_window/2; Size esize = src.size() + Size(border_size, border_size) * 2; cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer); GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step); cv::gpu::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s); GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size())); int bcols, brows; cudev::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows); buffer.create(brows, bcols, CV_32S); using namespace cv::gpu::cudev::imgproc; typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0}; dst.create(src.size(), src.type()); funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s)); }
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2], GpuMat l[2], GpuMat r[2], GpuMat disp_selected_pyr[2], GpuMat& data_cost, GpuMat& data_cost_selected, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, cudaStream_t stream) { CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane && left.rows == right.rows && left.cols == right.cols && left.type() == right.type()); CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3)); const Scalar zero = Scalar::all(0); //////////////////////////////////////////////////////////////////////////////////////////// // Init int rows = left.rows; int cols = left.cols; rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0))); int levels = rthis.levels; AutoBuffer<int> buf(levels * 4); int* cols_pyr = buf; int* rows_pyr = cols_pyr + levels; int* nr_plane_pyr = rows_pyr + levels; int* step_pyr = nr_plane_pyr + levels; cols_pyr[0] = cols; rows_pyr[0] = rows; nr_plane_pyr[0] = rthis.nr_plane; const int n = 64; step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T); for (int i = 1; i < levels; i++) { cols_pyr[i] = (cols_pyr[i-1] + 1) / 2; rows_pyr[i] = (rows_pyr[i-1] + 1) / 2; nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T); } Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]); Size data_cost_size(step_pyr[0], rows * nr_plane_pyr[0] * 2); u[0].create(msg_size, DataType<T>::type); d[0].create(msg_size, DataType<T>::type); l[0].create(msg_size, DataType<T>::type); r[0].create(msg_size, DataType<T>::type); u[1].create(msg_size, DataType<T>::type); d[1].create(msg_size, DataType<T>::type); l[1].create(msg_size, DataType<T>::type); r[1].create(msg_size, DataType<T>::type); disp_selected_pyr[0].create(msg_size, DataType<T>::type); disp_selected_pyr[1].create(msg_size, DataType<T>::type); data_cost.create(data_cost_size, DataType<T>::type); data_cost_selected.create(msg_size, DataType<T>::type); step_pyr[0] = data_cost.step / sizeof(T); Size temp_size = data_cost_size; if (data_cost_size.width * data_cost_size.height < step_pyr[levels - 1] * rows_pyr[levels - 1] * rthis.ndisp) temp_size = Size(step_pyr[levels - 1], rows_pyr[levels - 1] * rthis.ndisp); temp.create(temp_size, DataType<T>::type); //////////////////////////////////////////////////////////////////////////// // Compute csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp); l[0] = zero; d[0] = zero; r[0] = zero; u[0] = zero; l[1] = zero; d[1] = zero; r[1] = zero; u[1] = zero; data_cost = zero; data_cost_selected = zero; int cur_idx = 0; for (int i = levels - 1; i >= 0; i--) { if (i == levels - 1) { csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, stream); } else { csbp::compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); int new_idx = (cur_idx + 1) & 1; csbp::init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(), u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream); cur_idx = new_idx; } csbp::calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[i], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, stream); } if (disp.empty()) disp.create(rows, cols, CV_16S); out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); out = zero; csbp::compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[0], out, nr_plane_pyr[0], stream); if (disp.type() != CV_16S) out.convertTo(disp, disp.type()); }
void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream) { #ifndef HAVE_CUBLAS (void)src1; (void)src2; (void)alpha; (void)src3; (void)beta; (void)dst; (void)flags; (void)stream; CV_Error(CV_StsNotImplemented, "The library was build without CUBLAS"); #else // CUBLAS works with column-major matrices CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2); CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type())); if (src1.depth() == CV_64F) { if (!deviceSupports(NATIVE_DOUBLE)) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } bool tr1 = (flags & GEMM_1_T) != 0; bool tr2 = (flags & GEMM_2_T) != 0; bool tr3 = (flags & GEMM_3_T) != 0; if (src1.type() == CV_64FC2) { if (tr1 || tr2 || tr3) CV_Error(CV_StsNotImplemented, "transpose operation doesn't implemented for CV_64FC2 type"); } Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size(); Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size(); Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size(); Size dstSize(src2Size.width, src1Size.height); CV_Assert(src1Size.width == src2Size.height); CV_Assert(src3.empty() || src3Size == dstSize); dst.create(dstSize, src1.type()); if (beta != 0) { if (src3.empty()) { if (stream) stream.enqueueMemSet(dst, Scalar::all(0)); else dst.setTo(Scalar::all(0)); } else { if (tr3) { transpose(src3, dst, stream); } else { if (stream) stream.enqueueCopy(src3, dst); else src3.copyTo(dst); } } } cublasHandle_t handle; cublasSafeCall( cublasCreate_v2(&handle) ); cublasSafeCall( cublasSetStream_v2(handle, StreamAccessor::getStream(stream)) ); cublasSafeCall( cublasSetPointerMode_v2(handle, CUBLAS_POINTER_MODE_HOST) ); const float alphaf = static_cast<float>(alpha); const float betaf = static_cast<float>(beta); const cuComplex alphacf = make_cuComplex(alphaf, 0); const cuComplex betacf = make_cuComplex(betaf, 0); const cuDoubleComplex alphac = make_cuDoubleComplex(alpha, 0); const cuDoubleComplex betac = make_cuDoubleComplex(beta, 0); cublasOperation_t transa = tr2 ? CUBLAS_OP_T : CUBLAS_OP_N; cublasOperation_t transb = tr1 ? CUBLAS_OP_T : CUBLAS_OP_N; switch (src1.type()) { case CV_32FC1: cublasSafeCall( cublasSgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, &alphaf, src2.ptr<float>(), static_cast<int>(src2.step / sizeof(float)), src1.ptr<float>(), static_cast<int>(src1.step / sizeof(float)), &betaf, dst.ptr<float>(), static_cast<int>(dst.step / sizeof(float))) ); break; case CV_64FC1: cublasSafeCall( cublasDgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, &alpha, src2.ptr<double>(), static_cast<int>(src2.step / sizeof(double)), src1.ptr<double>(), static_cast<int>(src1.step / sizeof(double)), &beta, dst.ptr<double>(), static_cast<int>(dst.step / sizeof(double))) ); break; case CV_32FC2: cublasSafeCall( cublasCgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, &alphacf, src2.ptr<cuComplex>(), static_cast<int>(src2.step / sizeof(cuComplex)), src1.ptr<cuComplex>(), static_cast<int>(src1.step / sizeof(cuComplex)), &betacf, dst.ptr<cuComplex>(), static_cast<int>(dst.step / sizeof(cuComplex))) ); break; case CV_64FC2: cublasSafeCall( cublasZgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, &alphac, src2.ptr<cuDoubleComplex>(), static_cast<int>(src2.step / sizeof(cuDoubleComplex)), src1.ptr<cuDoubleComplex>(), static_cast<int>(src1.step / sizeof(cuDoubleComplex)), &betac, dst.ptr<cuDoubleComplex>(), static_cast<int>(dst.step / sizeof(cuDoubleComplex))) ); break; } cublasSafeCall( cublasDestroy_v2(handle) ); #endif }
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) { class LevelsInit { public: Npp32s pLevels[256]; const Npp32s* pLevels3[3]; int nValues3[3]; #if (CUDA_VERSION > 4020) GpuMat d_pLevels; #endif LevelsInit() { nValues3[0] = nValues3[1] = nValues3[2] = 256; for (int i = 0; i < 256; ++i) pLevels[i] = i; #if (CUDA_VERSION <= 4020) pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; #else d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>(); #endif } }; static LevelsInit lvls; int cn = src.channels(); CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous()); dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn)); NppiSize sz; sz.height = src.rows; sz.width = src.cols; Mat nppLut; lut.convertTo(nppLut, CV_32S); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); if (src.type() == CV_8UC1) { #if (CUDA_VERSION <= 4020) nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) ); #else GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), lvls.d_pLevels.ptr<Npp32s>(), 256) ); #endif } else { const Npp32s* pValues3[3]; Mat nppLut3[3]; if (nppLut.channels() == 1) { #if (CUDA_VERSION <= 4020) pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>(); #else GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>(); #endif } else { cv::split(nppLut, nppLut3); #if (CUDA_VERSION <= 4020) pValues3[0] = nppLut3[0].ptr<Npp32s>(); pValues3[1] = nppLut3[1].ptr<Npp32s>(); pValues3[2] = nppLut3[2].ptr<Npp32s>(); #else GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data)); GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data)); GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data)); pValues3[0] = d_nppLut0.ptr<Npp32s>(); pValues3[1] = d_nppLut1.ptr<Npp32s>(); pValues3[2] = d_nppLut2.ptr<Npp32s>(); #endif } nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) ); } if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream) { typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); #ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[6][4] = { {device::resize<uchar> , 0 /*device::resize<uchar2>*/ , device::resize<uchar3> , device::resize<uchar4> }, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {device::resize<float> , 0 /*device::resize<float2>*/ , device::resize<float3> , device::resize<float4> } }; #else static const func_t funcs[6][4] = { {device::resize<uchar> , 0 /*device::resize<uchar2>*/ , device::resize<uchar3> , device::resize<uchar4> }, {0 /*device::resize<schar>*/, 0 /*device::resize<char2>*/ , 0 /*device::resize<char3>*/, 0 /*device::resize<char4>*/}, {device::resize<ushort> , 0 /*device::resize<ushort2>*/, device::resize<ushort3> , device::resize<ushort4> }, {device::resize<short> , 0 /*device::resize<short2>*/ , device::resize<short3> , device::resize<short4> }, {0 /*device::resize<int>*/ , 0 /*device::resize<int2>*/ , 0 /*device::resize<int3>*/ , 0 /*device::resize<int4>*/ }, {device::resize<float> , 0 /*device::resize<float2>*/ , device::resize<float3> , device::resize<float4> } }; #endif CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA ); CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); if (dsize == Size()) { dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy)); } else { fx = static_cast<double>(dsize.width) / src.cols; fy = static_cast<double>(dsize.height) / src.rows; } dst.create(dsize, src.type()); if (dsize == src.size()) { if (stream) stream.enqueueCopy(src, dst); else src.copyTo(dst); return; } const func_t func = funcs[src.depth()][src.channels() - 1]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step); func(src, wholeSrc, ofs.y, ofs.x, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), interpolation, StreamAccessor::getStream(stream)); }
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); cudaStream_t stream = StreamAccessor::getStream(s); if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1)) { NppiSize srcsz; srcsz.width = src.cols; srcsz.height = src.rows; NppiSize dstsz; dstsz.width = dst.cols; dstsz.height = dst.rows; NppStreamHandler h(stream); switch (src.type()) { case CV_8UC1: { Npp8u nVal = saturate_cast<Npp8u>(value[0]); nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz, dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) ); break; } case CV_8UC4: { Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])}; nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz, dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) ); break; } case CV_32SC1: { Npp32s nVal = saturate_cast<Npp32s>(value[0]); nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz, dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) ); break; } case CV_32FC1: { Npp32f val = saturate_cast<Npp32f>(value[0]); Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val)); nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz, dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) ); break; } } if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } else { typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); static const caller_t callers[6][4] = { { copyMakeBorder_caller<uchar, 1> , copyMakeBorder_caller<uchar, 2> , copyMakeBorder_caller<uchar, 3> , copyMakeBorder_caller<uchar, 4>}, {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/}, { copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/, copyMakeBorder_caller<ushort, 3> , copyMakeBorder_caller<ushort, 4>}, { copyMakeBorder_caller<short, 1> , 0/*copyMakeBorder_caller<short, 2>*/ , copyMakeBorder_caller<short, 3> , copyMakeBorder_caller<short, 4>}, {0/*copyMakeBorder_caller<int, 1>*/, 0/*copyMakeBorder_caller<int, 2>*/ , 0/*copyMakeBorder_caller<int, 3>*/, 0/*copyMakeBorder_caller<int , 4>*/}, { copyMakeBorder_caller<float, 1> , 0/*copyMakeBorder_caller<float, 2>*/ , copyMakeBorder_caller<float, 3> , copyMakeBorder_caller<float ,4>} }; caller_t func = callers[src.depth()][src.channels() - 1]; CV_Assert(func != 0); func(src, dst, top, left, borderType, value, stream); } }
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream) { using namespace ::cv::gpu::device::matrix_reductions; CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F); CV_Assert(dim == 0 || dim == 1); CV_Assert(reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG || reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN); if (dtype < 0) dtype = src.depth(); dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKETYPE(dtype, src.channels())); if (dim == 0) { typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream); static const caller_t callers[6][6] = { { reduceRows_gpu<unsigned char, int, unsigned char>, 0/*reduceRows_gpu<unsigned char, int, signed char>*/, 0/*reduceRows_gpu<unsigned char, int, unsigned short>*/, 0/*reduceRows_gpu<unsigned char, int, short>*/, reduceRows_gpu<unsigned char, int, int>, reduceRows_gpu<unsigned char, int, float> }, { 0/*reduceRows_gpu<signed char, int, unsigned char>*/, 0/*reduceRows_gpu<signed char, int, signed char>*/, 0/*reduceRows_gpu<signed char, int, unsigned short>*/, 0/*reduceRows_gpu<signed char, int, short>*/, 0/*reduceRows_gpu<signed char, int, int>*/, 0/*reduceRows_gpu<signed char, int, float>*/ }, { 0/*reduceRows_gpu<unsigned short, int, unsigned char>*/, 0/*reduceRows_gpu<unsigned short, int, signed char>*/, reduceRows_gpu<unsigned short, int, unsigned short>, 0/*reduceRows_gpu<unsigned short, int, short>*/, reduceRows_gpu<unsigned short, int, int>, reduceRows_gpu<unsigned short, int, float> }, { 0/*reduceRows_gpu<short, int, unsigned char>*/, 0/*reduceRows_gpu<short, int, signed char>*/, 0/*reduceRows_gpu<short, int, unsigned short>*/, reduceRows_gpu<short, int, short>, reduceRows_gpu<short, int, int>, reduceRows_gpu<short, int, float> }, { 0/*reduceRows_gpu<int, int, unsigned char>*/, 0/*reduceRows_gpu<int, int, signed char>*/, 0/*reduceRows_gpu<int, int, unsigned short>*/, 0/*reduceRows_gpu<int, int, short>*/, reduceRows_gpu<int, int, int>, reduceRows_gpu<int, int, float> }, { 0/*reduceRows_gpu<float, float, unsigned char>*/, 0/*reduceRows_gpu<float, float, signed char>*/, 0/*reduceRows_gpu<float, float, unsigned short>*/, 0/*reduceRows_gpu<float, float, short>*/, 0/*reduceRows_gpu<float, float, int>*/, reduceRows_gpu<float, float, float> } }; const caller_t func = callers[src.depth()][dst.depth()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); func(src.reshape(1), dst.reshape(1), reduceOp, StreamAccessor::getStream(stream)); } else { typedef void (*caller_t)(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream); static const caller_t callers[6][6] = { { reduceCols_gpu<unsigned char, int, unsigned char>, 0/*reduceCols_gpu<unsigned char, int, signed char>*/, 0/*reduceCols_gpu<unsigned char, int, unsigned short>*/, 0/*reduceCols_gpu<unsigned char, int, short>*/, reduceCols_gpu<unsigned char, int, int>, reduceCols_gpu<unsigned char, int, float> }, { 0/*reduceCols_gpu<signed char, int, unsigned char>*/, 0/*reduceCols_gpu<signed char, int, signed char>*/, 0/*reduceCols_gpu<signed char, int, unsigned short>*/, 0/*reduceCols_gpu<signed char, int, short>*/, 0/*reduceCols_gpu<signed char, int, int>*/, 0/*reduceCols_gpu<signed char, int, float>*/ }, { 0/*reduceCols_gpu<unsigned short, int, unsigned char>*/, 0/*reduceCols_gpu<unsigned short, int, signed char>*/, reduceCols_gpu<unsigned short, int, unsigned short>, 0/*reduceCols_gpu<unsigned short, int, short>*/, reduceCols_gpu<unsigned short, int, int>, reduceCols_gpu<unsigned short, int, float> }, { 0/*reduceCols_gpu<short, int, unsigned char>*/, 0/*reduceCols_gpu<short, int, signed char>*/, 0/*reduceCols_gpu<short, int, unsigned short>*/, reduceCols_gpu<short, int, short>, reduceCols_gpu<short, int, int>, reduceCols_gpu<short, int, float> }, { 0/*reduceCols_gpu<int, int, unsigned char>*/, 0/*reduceCols_gpu<int, int, signed char>*/, 0/*reduceCols_gpu<int, int, unsigned short>*/, 0/*reduceCols_gpu<int, int, short>*/, reduceCols_gpu<int, int, int>, reduceCols_gpu<int, int, float> }, { 0/*reduceCols_gpu<float, unsigned char>*/, 0/*reduceCols_gpu<float, signed char>*/, 0/*reduceCols_gpu<float, unsigned short>*/, 0/*reduceCols_gpu<float, short>*/, 0/*reduceCols_gpu<float, int>*/, reduceCols_gpu<float, float, float> } }; const caller_t func = callers[src.depth()][dst.depth()]; if (!func) CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); func(src, src.channels(), dst, reduceOp, StreamAccessor::getStream(stream)); } }
void cv::gpu::FarnebackOpticalFlow::operator ()( const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s) { CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); CV_Assert(frame0.size() == frame1.size()); CV_Assert(polyN == 5 || polyN == 7); CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6); Stream streams[5]; if (S(s)) streams[0] = s; Size size = frame0.size(); GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY; flowx.create(size, CV_32F); flowy.create(size, CV_32F); GpuMat flowx0 = flowx; GpuMat flowy0 = flowy; // Crop unnecessary levels double scale = 1; int numLevelsCropped = 0; for (; numLevelsCropped < numLevels; numLevelsCropped++) { scale *= pyrScale; if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE) break; } streams[0].enqueueConvert(frame0, frames_[0], CV_32F); streams[1].enqueueConvert(frame1, frames_[1], CV_32F); if (fastPyramids) { // Build Gaussian pyramids using pyrDown() pyramid0_.resize(numLevelsCropped + 1); pyramid1_.resize(numLevelsCropped + 1); pyramid0_[0] = frames_[0]; pyramid1_[0] = frames_[1]; for (int i = 1; i <= numLevelsCropped; ++i) { pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]); pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]); } } setPolynomialExpansionConsts(polyN, polySigma); device::optflow_farneback::setUpdateMatricesConsts(); for (int k = numLevelsCropped; k >= 0; k--) { streams[0].waitForCompletion(); scale = 1; for (int i = 0; i < k; i++) scale *= pyrScale; double sigma = (1./scale - 1) * 0.5; int smoothSize = cvRound(sigma*5) | 1; smoothSize = std::max(smoothSize, 3); int width = cvRound(size.width*scale); int height = cvRound(size.height*scale); if (fastPyramids) { width = pyramid0_[k].cols; height = pyramid0_[k].rows; } if (k > 0) { curFlowX.create(height, width, CV_32F); curFlowY.create(height, width, CV_32F); } else { curFlowX = flowx0; curFlowY = flowy0; } if (!prevFlowX.data) { if (flags & OPTFLOW_USE_INITIAL_FLOW) { #if ENABLE_GPU_RESIZE resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale); streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale); #else Mat tmp1, tmp2; flowx0.download(tmp1); resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA); tmp2 *= scale; curFlowX.upload(tmp2); flowy0.download(tmp1); resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA); tmp2 *= scale; curFlowY.upload(tmp2); #endif } else { streams[0].enqueueMemSet(curFlowX, 0); streams[1].enqueueMemSet(curFlowY, 0); } } else { #if ENABLE_GPU_RESIZE resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), 1./pyrScale); streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), 1./pyrScale); #else Mat tmp1, tmp2; prevFlowX.download(tmp1); resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR); tmp2 *= 1./pyrScale; curFlowX.upload(tmp2); prevFlowY.download(tmp1); resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR); tmp2 *= 1./pyrScale; curFlowY.upload(tmp2); #endif } GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_); GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_); GpuMat R[2] = { allocMatFromBuf(5*height, width, CV_32F, R_[0]), allocMatFromBuf(5*height, width, CV_32F, R_[1]) }; if (fastPyramids) { device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN, R[0], S(streams[0])); device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN, R[1], S(streams[1])); } else { GpuMat blurredFrame[2] = { allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]), allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1]) }; GpuMat pyrLevel[2] = { allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]), allocMatFromBuf(height, width, CV_32F, pyrLevel_[1]) }; Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2); for (int i = 0; i < 2; i++) { device::optflow_farneback::gaussianBlurGpu( frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101_GPU, S(streams[i])); #if ENABLE_GPU_RESIZE resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]); #else Mat tmp1, tmp2; tmp[i].download(tmp1); resize(tmp1, tmp2, Size(width, height), INTER_LINEAR); I[i].upload(tmp2); #endif device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i])); } } streams[1].waitForCompletion(); device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, S(streams[0])); if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) { Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F); device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2); } for (int i = 0; i < numIters; i++) { if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams); else updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams); } prevFlowX = curFlowX; prevFlowY = curFlowY; } flowx = curFlowX; flowy = curFlowY; if (!S(s)) streams[0].waitForCompletion(); }
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s) { #if (CUDA_VERSION < 5000) CV_Assert(terminals.type() == CV_32S); #else CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); #endif Size src_size = terminals.size(); CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(leftTransp.type() == terminals.type()); CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(rightTransp.type() == terminals.type()); CV_Assert(top.size() == src_size); CV_Assert(top.type() == terminals.type()); CV_Assert(topLeft.size() == src_size); CV_Assert(topLeft.type() == terminals.type()); CV_Assert(topRight.size() == src_size); CV_Assert(topRight.type() == terminals.type()); CV_Assert(bottom.size() == src_size); CV_Assert(bottom.type() == terminals.type()); CV_Assert(bottomLeft.size() == src_size); CV_Assert(bottomLeft.type() == terminals.type()); CV_Assert(bottomRight.size() == src_size); CV_Assert(bottomRight.type() == terminals.type()); labels.create(src_size, CV_8U); NppiSize sznpp; sznpp.width = src_size.width; sznpp.height = src_size.height; int bufsz; nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) ); ensureSizeIsEnough(1, bufsz, CV_8U, buf); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc); #if (CUDA_VERSION < 5000) nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(), bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); #else if (terminals.type() == CV_32S) { nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(), bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } else { nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(), bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } #endif if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void cv::cuda::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st) { CV_Assert( prev.type() == CV_8UC1 ); CV_Assert( curr.size() == prev.size() && curr.type() == prev.type() ); const Size velSize((prev.cols - blockSize.width + shiftSize.width) / shiftSize.width, (prev.rows - blockSize.height + shiftSize.height) / shiftSize.height); velx.create(velSize, CV_32FC1); vely.create(velSize, CV_32FC1); // scanning scheme coordinates std::vector<short2> ss((2 * maxRange.width + 1) * (2 * maxRange.height + 1)); int ssCount = 0; // Calculate scanning scheme const int minCount = std::min(maxRange.width, maxRange.height); // use spiral search pattern // // 9 10 11 12 // 8 1 2 13 // 7 * 3 14 // 6 5 4 15 //... 20 19 18 17 // for (int i = 0; i < minCount; ++i) { // four cycles along sides int x = -i - 1, y = x; // upper side for (int j = -i; j <= i + 1; ++j, ++ssCount) { ss[ssCount].x = (short) ++x; ss[ssCount].y = (short) y; } // right side for (int j = -i; j <= i + 1; ++j, ++ssCount) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) ++y; } // bottom side for (int j = -i; j <= i + 1; ++j, ++ssCount) { ss[ssCount].x = (short) --x; ss[ssCount].y = (short) y; } // left side for (int j = -i; j <= i + 1; ++j, ++ssCount) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) --y; } } // the rest part if (maxRange.width < maxRange.height) { const int xleft = -minCount; // cycle by neighbor rings for (int i = minCount; i < maxRange.height; ++i) { // two cycles by x int y = -(i + 1); int x = xleft; // upper side for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) y; } x = xleft; y = -y; // bottom side for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) y; } } } else if (maxRange.width > maxRange.height) { const int yupper = -minCount; // cycle by neighbor rings for (int i = minCount; i < maxRange.width; ++i) { // two cycles by y int x = -(i + 1); int y = yupper; // left side for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) y; } y = yupper; x = -x; // right side for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y) { ss[ssCount].x = (short) x; ss[ssCount].y = (short) y; } } } const cudaStream_t stream = StreamAccessor::getStream(st); ensureSizeIsEnough(1, ssCount, CV_16SC2, buf); if (stream == 0) cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) ); else cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) ); const int maxX = prev.cols - blockSize.width; const int maxY = prev.rows - blockSize.height; const int SMALL_DIFF = 2; const int BIG_DIFF = 128; const int blSize = blockSize.area(); const int acceptLevel = blSize * SMALL_DIFF; const int escapeLevel = blSize * BIG_DIFF; optflowbm::calc(prev, curr, velx, vely, make_int2(blockSize.width, blockSize.height), make_int2(shiftSize.width, shiftSize.height), usePrevious, maxX, maxY, acceptLevel, escapeLevel, buf.ptr<short2>(), ssCount, stream); }
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s) { CV_Assert(M.rows == 3 && M.cols == 3); int interpolation = flags & INTER_MAX; CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); static const bool useNppTab[6][4][3] = { { {false, false, true}, {false, false, false}, {false, true, true}, {false, false, false} }, { {false, false, false}, {false, false, false}, {false, false, false}, {false, false, false} }, { {false, true, true}, {false, false, false}, {false, true, true}, {false, false, false} }, { {false, false, false}, {false, false, false}, {false, false, false}, {false, false, false} }, { {false, true, true}, {false, false, false}, {false, true, true}, {false, false, true} }, { {false, true, true}, {false, false, false}, {false, true, true}, {false, false, true} } }; bool useNpp = borderMode == BORDER_CONSTANT; useNpp = useNpp && useNppTab[src.depth()][src.channels() - 1][interpolation]; #ifdef linux // NPP bug on float data useNpp = useNpp && src.depth() != CV_32F; #endif if (useNpp) { typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::Size wholeSize, cv::Point ofs, cv::gpu::GpuMat& dst, double coeffs[][3], cv::Size dsize, int flags, cudaStream_t stream); static const func_t funcs[2][6][4] = { { {NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call}, {0, 0, 0, 0}, {NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call}, {0, 0, 0, 0}, {NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call}, {NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call} }, { {NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call}, {0, 0, 0, 0}, {NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call}, {0, 0, 0, 0}, {NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call}, {NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call} } }; double coeffs[3][3]; Mat coeffsMat(3, 3, CV_64F, (void*)coeffs); M.convertTo(coeffsMat, coeffsMat.type()); const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1]; CV_Assert(func != 0); func(src, wholeSize, ofs, dst, coeffs, dsize, interpolation, StreamAccessor::getStream(s)); } else { using namespace cv::gpu::device::imgproc; typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc); static const func_t funcs[6][4] = { {warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> }, {0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/ , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/}, {warpPerspective_gpu<ushort> , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3> , warpPerspective_gpu<ushort4> }, {warpPerspective_gpu<short> , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3> , warpPerspective_gpu<short4> }, {0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ }, {warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> } }; const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType)); dst.create(dsize, src.type()); float coeffs[3 * 3]; Mat coeffsMat(3, 3, CV_32F, (void*)coeffs); if (flags & WARP_INVERSE_MAP) M.convertTo(coeffsMat, coeffsMat.type()); else { cv::Mat iM; invert(M, iM); iM.convertTo(coeffsMat, coeffsMat.type()); } Scalar_<float> borderValueFloat; borderValueFloat = borderValue; DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs, dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc); } }
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA); CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0)); if (dsize == Size()) dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy)); else { fx = static_cast<double>(dsize.width) / src.cols; fy = static_cast<double>(dsize.height) / src.rows; } if (dsize != dst.size()) dst.create(dsize, src.type()); if (dsize == src.size()) { if (s) s.enqueueCopy(src, dst); else src.copyTo(dst); return; } cudaStream_t stream = StreamAccessor::getStream(s); Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4); useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || (src.type() == CV_8UC4 && interpolation != INTER_AREA)); if (useNpp) { typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize, double xFactor, double yFactor, int eInterpolation); const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R }; static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS}; NppiSize srcsz; srcsz.width = wholeSize.width; srcsz.height = wholeSize.height; NppiRect srcrect; srcrect.x = ofs.x; srcrect.y = ofs.y; srcrect.width = src.cols; srcrect.height = src.rows; NppiSize dstsz; dstsz.width = dst.cols; dstsz.height = dst.rows; NppStreamHandler h(stream); nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect, dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } else { using namespace ::cv::gpu::device::imgproc; typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream); static const func_t funcs[6][4] = { {resize_gpu<uchar> , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3> , resize_gpu<uchar4> }, {0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/ , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/}, {resize_gpu<ushort> , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3> , resize_gpu<ushort4> }, {resize_gpu<short> , 0 /*resize_gpu<short2>*/ , resize_gpu<short3> , resize_gpu<short4> }, {0 /*resize_gpu<int>*/ , 0 /*resize_gpu<int2>*/ , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ }, {resize_gpu<float> , 0 /*resize_gpu<float2>*/ , resize_gpu<float3> , resize_gpu<float4> } }; const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream); } }
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream) { CV_Assert( src.channels() <= 4 ); CV_Assert( dim == 0 || dim == 1 ); CV_Assert( reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_MAX || reduceOp == REDUCE_MIN ); if (dtype < 0) dtype = src.depth(); dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); if (dim == 0) { typedef void (*func_t)(PtrStepSzb src, void* dst, int op, cudaStream_t stream); static const func_t funcs[7][7] = { { ::reduce::rows<unsigned char, int, unsigned char>, 0/*::reduce::rows<unsigned char, int, signed char>*/, 0/*::reduce::rows<unsigned char, int, unsigned short>*/, 0/*::reduce::rows<unsigned char, int, short>*/, ::reduce::rows<unsigned char, int, int>, ::reduce::rows<unsigned char, float, float>, ::reduce::rows<unsigned char, double, double> }, { 0/*::reduce::rows<signed char, int, unsigned char>*/, 0/*::reduce::rows<signed char, int, signed char>*/, 0/*::reduce::rows<signed char, int, unsigned short>*/, 0/*::reduce::rows<signed char, int, short>*/, 0/*::reduce::rows<signed char, int, int>*/, 0/*::reduce::rows<signed char, float, float>*/, 0/*::reduce::rows<signed char, double, double>*/ }, { 0/*::reduce::rows<unsigned short, int, unsigned char>*/, 0/*::reduce::rows<unsigned short, int, signed char>*/, ::reduce::rows<unsigned short, int, unsigned short>, 0/*::reduce::rows<unsigned short, int, short>*/, ::reduce::rows<unsigned short, int, int>, ::reduce::rows<unsigned short, float, float>, ::reduce::rows<unsigned short, double, double> }, { 0/*::reduce::rows<short, int, unsigned char>*/, 0/*::reduce::rows<short, int, signed char>*/, 0/*::reduce::rows<short, int, unsigned short>*/, ::reduce::rows<short, int, short>, ::reduce::rows<short, int, int>, ::reduce::rows<short, float, float>, ::reduce::rows<short, double, double> }, { 0/*::reduce::rows<int, int, unsigned char>*/, 0/*::reduce::rows<int, int, signed char>*/, 0/*::reduce::rows<int, int, unsigned short>*/, 0/*::reduce::rows<int, int, short>*/, ::reduce::rows<int, int, int>, ::reduce::rows<int, float, float>, ::reduce::rows<int, double, double> }, { 0/*::reduce::rows<float, float, unsigned char>*/, 0/*::reduce::rows<float, float, signed char>*/, 0/*::reduce::rows<float, float, unsigned short>*/, 0/*::reduce::rows<float, float, short>*/, 0/*::reduce::rows<float, float, int>*/, ::reduce::rows<float, float, float>, ::reduce::rows<float, double, double> }, { 0/*::reduce::rows<double, double, unsigned char>*/, 0/*::reduce::rows<double, double, signed char>*/, 0/*::reduce::rows<double, double, unsigned short>*/, 0/*::reduce::rows<double, double, short>*/, 0/*::reduce::rows<double, double, int>*/, 0/*::reduce::rows<double, double, float>*/, ::reduce::rows<double, double, double> } }; const func_t func = funcs[src.depth()][dst.depth()]; if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats"); func(src.reshape(1), dst.data, reduceOp, StreamAccessor::getStream(stream)); } else { typedef void (*func_t)(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); static const func_t funcs[7][7] = { { ::reduce::cols<unsigned char, int, unsigned char>, 0/*::reduce::cols<unsigned char, int, signed char>*/, 0/*::reduce::cols<unsigned char, int, unsigned short>*/, 0/*::reduce::cols<unsigned char, int, short>*/, ::reduce::cols<unsigned char, int, int>, ::reduce::cols<unsigned char, float, float>, ::reduce::cols<unsigned char, double, double> }, { 0/*::reduce::cols<signed char, int, unsigned char>*/, 0/*::reduce::cols<signed char, int, signed char>*/, 0/*::reduce::cols<signed char, int, unsigned short>*/, 0/*::reduce::cols<signed char, int, short>*/, 0/*::reduce::cols<signed char, int, int>*/, 0/*::reduce::cols<signed char, float, float>*/, 0/*::reduce::cols<signed char, double, double>*/ }, { 0/*::reduce::cols<unsigned short, int, unsigned char>*/, 0/*::reduce::cols<unsigned short, int, signed char>*/, ::reduce::cols<unsigned short, int, unsigned short>, 0/*::reduce::cols<unsigned short, int, short>*/, ::reduce::cols<unsigned short, int, int>, ::reduce::cols<unsigned short, float, float>, ::reduce::cols<unsigned short, double, double> }, { 0/*::reduce::cols<short, int, unsigned char>*/, 0/*::reduce::cols<short, int, signed char>*/, 0/*::reduce::cols<short, int, unsigned short>*/, ::reduce::cols<short, int, short>, ::reduce::cols<short, int, int>, ::reduce::cols<short, float, float>, ::reduce::cols<short, double, double> }, { 0/*::reduce::cols<int, int, unsigned char>*/, 0/*::reduce::cols<int, int, signed char>*/, 0/*::reduce::cols<int, int, unsigned short>*/, 0/*::reduce::cols<int, int, short>*/, ::reduce::cols<int, int, int>, ::reduce::cols<int, float, float>, ::reduce::cols<int, double, double> }, { 0/*::reduce::cols<float, float, unsigned char>*/, 0/*::reduce::cols<float, float, signed char>*/, 0/*::reduce::cols<float, float, unsigned short>*/, 0/*::reduce::cols<float, float, short>*/, 0/*::reduce::cols<float, float, int>*/, ::reduce::cols<float, float, float>, ::reduce::cols<float, double, double> }, { 0/*::reduce::cols<double, double, unsigned char>*/, 0/*::reduce::cols<double, double, signed char>*/, 0/*::reduce::cols<double, double, unsigned short>*/, 0/*::reduce::cols<double, double, short>*/, 0/*::reduce::cols<double, double, int>*/, 0/*::reduce::cols<double, double, float>*/, ::reduce::cols<double, double, double> } }; const func_t func = funcs[src.depth()][dst.depth()]; if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats"); func(src, dst.data, src.channels(), reduceOp, StreamAccessor::getStream(stream)); } }
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) { CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane && left.rows == right.rows && left.cols == right.cols && left.type() == right.type()); CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4)); const Scalar zero = Scalar::all(0); cudaStream_t cudaStream = StreamAccessor::getStream(stream); //////////////////////////////////////////////////////////////////////////////////////////// // Init int rows = left.rows; int cols = left.cols; rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0))); int levels = rthis.levels; // compute sizes AutoBuffer<int> buf(levels * 3); int* cols_pyr = buf; int* rows_pyr = cols_pyr + levels; int* nr_plane_pyr = rows_pyr + levels; cols_pyr[0] = cols; rows_pyr[0] = rows; nr_plane_pyr[0] = rthis.nr_plane; for (int i = 1; i < levels; i++) { cols_pyr[i] = cols_pyr[i-1] / 2; rows_pyr[i] = rows_pyr[i-1] / 2; nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; } GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected; //allocate buffers int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2 buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1; buffers_count += 1; // data_cost_selected mbuf.create(rows * rthis.nr_plane * buffers_count, cols, DataType<T>::type); data_cost = mbuf.rowRange(0, rows * rthis.nr_plane * 2); data_cost_selected = mbuf.rowRange(data_cost.rows, data_cost.rows + rows * rthis.nr_plane); for(int k = 0; k < 2; ++k) // in/out { GpuMat sub1 = mbuf.rowRange(data_cost.rows + data_cost_selected.rows, mbuf.rows); GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2); GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] }; for(int _r = 0; _r < 5; ++_r) { *buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5); assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane); } }; size_t elem_step = mbuf.step / sizeof(T); Size temp_size = data_cost.size(); if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp) temp_size = Size(static_cast<int>(elem_step), rows_pyr[levels - 1] * rthis.ndisp); temp.create(temp_size, DataType<T>::type); //////////////////////////////////////////////////////////////////////////// // Compute load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp); if (stream) { stream.enqueueMemSet(l[0], zero); stream.enqueueMemSet(d[0], zero); stream.enqueueMemSet(r[0], zero); stream.enqueueMemSet(u[0], zero); stream.enqueueMemSet(l[1], zero); stream.enqueueMemSet(d[1], zero); stream.enqueueMemSet(r[1], zero); stream.enqueueMemSet(u[1], zero); stream.enqueueMemSet(data_cost, zero); stream.enqueueMemSet(data_cost_selected, zero); } else { l[0].setTo(zero); d[0].setTo(zero); r[0].setTo(zero); u[0].setTo(zero); l[1].setTo(zero); d[1].setTo(zero); r[1].setTo(zero); u[1].setTo(zero); data_cost.setTo(zero); data_cost_selected.setTo(zero); } int cur_idx = 0; for (int i = levels - 1; i >= 0; i--) { if (i == levels - 1) { init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream); } else { compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), elem_step, left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream); int new_idx = (cur_idx + 1) & 1; init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(), u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), data_cost.ptr<T>(), elem_step, rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], cudaStream); cur_idx = new_idx; } calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream); } if (disp.empty()) disp.create(rows, cols, CV_16S); out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); if (stream) stream.enqueueMemSet(out, zero); else out.setTo(zero); compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, out, nr_plane_pyr[0], cudaStream); if (disp.type() != CV_16S) { if (stream) stream.enqueueConvert(out, disp, disp.type()); else out.convertTo(disp, disp.type()); } }
void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy) { CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); CV_Assert( I0.size() == I1.size() ); CV_Assert( I0.type() == I1.type() ); CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); CV_Assert( nscales > 0 ); // allocate memory for the pyramid structure I0s.resize(nscales); I1s.resize(nscales); u1s.resize(nscales); u2s.resize(nscales); I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); if (!useInitialFlow) { flowx.create(I0.size(), CV_32FC1); flowy.create(I0.size(), CV_32FC1); } u1s[0] = flowx; u2s[0] = flowy; I1x_buf.create(I0.size(), CV_32FC1); I1y_buf.create(I0.size(), CV_32FC1); I1w_buf.create(I0.size(), CV_32FC1); I1wx_buf.create(I0.size(), CV_32FC1); I1wy_buf.create(I0.size(), CV_32FC1); grad_buf.create(I0.size(), CV_32FC1); rho_c_buf.create(I0.size(), CV_32FC1); p11_buf.create(I0.size(), CV_32FC1); p12_buf.create(I0.size(), CV_32FC1); p21_buf.create(I0.size(), CV_32FC1); p22_buf.create(I0.size(), CV_32FC1); diff_buf.create(I0.size(), CV_32FC1); // create the scales for (int s = 1; s < nscales; ++s) { gpu::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep); gpu::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep); if (I0s[s].cols < 16 || I0s[s].rows < 16) { nscales = s; break; } if (useInitialFlow) { gpu::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep); gpu::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep); gpu::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]); gpu::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]); } else { u1s[s].create(I0s[s].size(), CV_32FC1); u2s[s].create(I0s[s].size(), CV_32FC1); } } if (!useInitialFlow) { u1s[nscales-1].setTo(Scalar::all(0)); u2s[nscales-1].setTo(Scalar::all(0)); } // pyramidal structure for computing the optical flow for (int s = nscales - 1; s >= 0; --s) { // compute the optical flow at the current scale procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); // if this was the last scale, finish now if (s == 0) break; // otherwise, upsample the optical flow // zoom the optical flow for the next finer scale gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); // scale the optical flow with the appropriate zoom factor gpu::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]); gpu::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]); } }