static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize) { UMat src = _src.getUMat(); _dst.create(src.size(), src.type()); UMat dst = _dst.getUMat(); UMat src_lab; cvtColor(src, src_lab, COLOR_LBGR2Lab); UMat l(src.size(), CV_8U); UMat ab(src.size(), CV_8UC2); std::vector<UMat> l_ab(2), l_ab_denoised(2); l_ab[0] = l; l_ab[1] = ab; l_ab_denoised[0].create(src.size(), CV_8U); l_ab_denoised[1].create(src.size(), CV_8UC2); int from_to[] = { 0,0, 1,1, 2,2 }; mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3); fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize); fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize); UMat dst_lab(src.size(), CV_8UC3); mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3); cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels()); return true; }
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat image_sums, temp; integral(_image, image_sums, CV_32F); int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); UMat result = _result.getUMat(); Size tsize = templ.size(); if (cn==1) { Scalar templMean = mean(templ); float templ_sum = (float)templMean[0]; k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } else { Vec4f templ_sum = Vec4f::all(0); templ_sum = (Vec4f)mean(templ); k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) || (!doubleSupport && depth == CV_64F)) return false; const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type], ocl::typeToStr(ktype), ocl::typeToStr(depth), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); if (depth <= CV_32S) thresh = cvFloor(thresh); k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval)))); size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); }
OCL_PERF_TEST_P(stitch, b12, TEST_DETECTORS) { UMat pano; vector<Mat> imgs; imgs.push_back( imread( getDataPath("stitching/b1.png") ) ); imgs.push_back( imread( getDataPath("stitching/b2.png") ) ); Ptr<detail::FeaturesFinder> featuresFinder = getFeatureFinder(GetParam()); Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb" ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE) : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE); declare.iterations(20); while(next()) { Stitcher stitcher = Stitcher::createDefault(); stitcher.setFeaturesFinder(featuresFinder); stitcher.setFeaturesMatcher(featuresMatcher); stitcher.setWarper(makePtr<SphericalWarper>()); stitcher.setRegistrationResol(WORK_MEGAPIX); startTimer(); stitcher.stitch(imgs, pano); stopTimer(); } EXPECT_NEAR(pano.size().width, 1124, 50); EXPECT_NEAR(pano.size().height, 644, 30); SANITY_CHECK_NOTHING(); }
UMat& UMat::setTo(InputArray _value, InputArray _mask) { bool haveMask = !_mask.empty(); #ifdef HAVE_OPENCL int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp); if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() ) { Mat value = _value.getMat(); CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) ); int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)), kertp = CV_MAKE_TYPE(d, kercn); double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn); int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d", ocl::memopTypeToStr(kertp), rowsPerWI, ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)), ocl::memopTypeToStr(d), kercn); ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts); if( !setK.empty() ) { ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn); UMat mask; if( haveMask ) { mask = _mask.getUMat(); CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 ); ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), dstarg = ocl::KernelArg::ReadWrite(*this); setK.args(maskarg, dstarg, scalararg); } else { ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn); setK.args(dstarg, scalararg); } size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI }; if( setK.run(2, globalsize, NULL, false) ) { CV_IMPL_ADD(CV_IMPL_OCL); return *this; } } } #endif Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE); m.setTo(_value, _mask); return *this; }
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat image_sums, temp; integral(_image, temp); if (temp.depth() == CV_64F) temp.convertTo(image_sums, CV_32F); else image_sums = temp; int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); Size size = _image.size(), tsize = templ.size(); _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); if (cn == 1) { float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area(); k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } else { Vec4f templ_sum = Vec4f::all(0); templ_sum = sum(templ) / tsize.area(); if (cn == 2) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1]); else if (cn==3) k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2]); else k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
OCL_PERF_TEST_P(stitch, boat, TEST_DETECTORS) { Size expected_dst_size(10789, 2663); checkDeviceMaxMemoryAllocSize(expected_dst_size, CV_16SC3, 4); #if defined(_WIN32) && !defined(_WIN64) if (cv::ocl::useOpenCL()) throw ::perf::TestBase::PerfSkipTestException(); #endif UMat pano; vector<Mat> _imgs; _imgs.push_back( imread( getDataPath("stitching/boat1.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat2.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat3.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat4.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat5.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat6.jpg") ) ); vector<UMat> imgs = ToUMat(_imgs); Ptr<detail::FeaturesFinder> featuresFinder = getFeatureFinder(GetParam()); Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb" ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE) : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE); declare.iterations(20); while(next()) { Stitcher stitcher = Stitcher::createDefault(); stitcher.setFeaturesFinder(featuresFinder); stitcher.setFeaturesMatcher(featuresMatcher); stitcher.setWarper(makePtr<SphericalWarper>()); stitcher.setRegistrationResol(WORK_MEGAPIX); startTimer(); stitcher.stitch(imgs, pano); stopTimer(); } EXPECT_NEAR(pano.size().width, expected_dst_size.width, 200); EXPECT_NEAR(pano.size().height, expected_dst_size.height, 100); SANITY_CHECK_NOTHING(); }
UMat& UMat::setTo(InputArray _value, InputArray _mask) { bool haveMask = !_mask.empty(); int tp = type(), cn = CV_MAT_CN(tp); if( dims <= 2 && cn <= 4 && cn != 3 && ocl::useOpenCL() ) { Mat value = _value.getMat(); CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) ); double buf[4]; convertAndUnrollScalar(value, tp, (uchar*)buf, 1); char opts[1024]; sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp)); ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts); if( !setK.empty() ) { ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp)); UMat mask; if( haveMask ) { mask = _mask.getUMat(); CV_Assert( mask.size() == size() && mask.type() == CV_8U ); ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this); setK.args(maskarg, dstarg, scalararg); } else { ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this); setK.args(dstarg, scalararg); } size_t globalsize[] = { cols, rows }; if( setK.run(2, globalsize, 0, false) ) return *this; } } Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE); m.setTo(_value, _mask); return *this; }
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) { bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) return false; static const int tileSize = 16; String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), tileSize, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); if (kcols.empty()) return false; UMat src = _src.getUMat(); Size src_size = src.size(); Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); UMat buf(bufsize, sdepth); UMat buf_sq(bufsize, sqdepth); kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); size_t gt = src.cols, lt = tileSize; if (!kcols.run(1, >, <, false)) return false; ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); if (krows.empty()) return false; Size sumsize(src_size.width + 1, src_size.height + 1); _sum.create(sumsize, sdepth); UMat sum = _sum.getUMat(); _sqsum.create(sumsize, sqdepth); UMat sum_sq = _sqsum.getUMat(); krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); gt = src.rows; return krows.run(1, >, <, false); }
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) || (!doubleSupport && depth == CV_64F)) return false; const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Device dev = ocl::Device::getDefault(); int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type], ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); if (depth <= CV_32S) thresh = cvFloor(thresh); const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 }; double min_val = min_vals[depth]; k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))), ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val)))); size_t globalsize[2] = { static_cast<size_t>(dst.cols * cn / kercn), static_cast<size_t>(dst.rows) }; globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size; return k.run(2, globalsize, NULL, false); }
OCL_PERF_TEST_P(stitch, boat, TEST_DETECTORS) { UMat pano; vector<Mat> _imgs; _imgs.push_back( imread( getDataPath("stitching/boat1.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat2.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat3.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat4.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat5.jpg") ) ); _imgs.push_back( imread( getDataPath("stitching/boat6.jpg") ) ); vector<UMat> imgs = ToUMat(_imgs); Ptr<detail::FeaturesFinder> featuresFinder = GetParam() == "orb" ? Ptr<detail::FeaturesFinder>(new detail::OrbFeaturesFinder()) : Ptr<detail::FeaturesFinder>(new detail::SurfFeaturesFinder()); Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb" ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE) : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE); declare.iterations(20); while(next()) { Stitcher stitcher = Stitcher::createDefault(); stitcher.setFeaturesFinder(featuresFinder); stitcher.setFeaturesMatcher(featuresMatcher); stitcher.setWarper(makePtr<SphericalWarper>()); stitcher.setRegistrationResol(WORK_MEGAPIX); startTimer(); stitcher.stitch(imgs, pano); stopTimer(); } EXPECT_NEAR(pano.size().width, 10789, 200); EXPECT_NEAR(pano.size().height, 2663, 100); SANITY_CHECK_NOTHING(); }
static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights) { int cn = _src.channels(); ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc, format("-D cn=%d", cn)); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), src.type()); _dst.setTo(Scalar::all(0)); UMat dst = _dst.getUMat(); const int ksize = (btvKernelSize - 1) / 2; k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights)); size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows }; return k.run(2, globalsize, NULL, false); }
int main(int argc, char** argv) { const char* keys = "{ i input | | specify input image }" "{ h help | | print help message }"; cv::CommandLineParser args(argc, argv, keys); if (args.has("help")) { cout << "Usage : " << argv[0] << " [options]" << endl; cout << "Available options:" << endl; args.printMessage(); return EXIT_SUCCESS; } cv::ocl::Context ctx = cv::ocl::Context::getDefault(); if (!ctx.ptr()) { cerr << "OpenCL is not available" << endl; return 1; } cv::ocl::Device device = cv::ocl::Device::getDefault(); if (!device.compilerAvailable()) { cerr << "OpenCL compiler is not available" << endl; return 1; } UMat src; { string image_file = args.get<string>("i"); if (!image_file.empty()) { Mat image = imread(image_file); if (image.empty()) { cout << "error read image: " << image_file << endl; return 1; } cvtColor(image, src, COLOR_BGR2GRAY); } else { Mat frame(cv::Size(640, 480), CV_8U, Scalar::all(128)); Point p(frame.cols / 2, frame.rows / 2); line(frame, Point(0, frame.rows / 2), Point(frame.cols, frame.rows / 2), 1); circle(frame, p, 200, Scalar(32, 32, 32), 8, LINE_AA); string str = "OpenCL"; int baseLine = 0; Size box = getTextSize(str, FONT_HERSHEY_COMPLEX, 2, 5, &baseLine); putText(frame, str, Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine), FONT_HERSHEY_COMPLEX, 2, Scalar(255, 255, 255), 5, LINE_AA); frame.copyTo(src); } } cv::String module_name; // empty to disable OpenCL cache { cout << "OpenCL program source: " << endl; cout << "======================================================================================================" << endl; cout << opencl_kernel_src << endl; cout << "======================================================================================================" << endl; //! [Define OpenCL program source] cv::ocl::ProgramSource source(module_name, "simple", opencl_kernel_src, ""); //! [Define OpenCL program source] //! [Compile/build OpenCL for current OpenCL device] cv::String errmsg; cv::ocl::Program program(source, "", errmsg); if (program.ptr() == NULL) { cerr << "Can't compile OpenCL program:" << endl << errmsg << endl; return 1; } //! [Compile/build OpenCL for current OpenCL device] if (!errmsg.empty()) { cout << "OpenCL program build log:" << endl << errmsg << endl; } //! [Get OpenCL kernel by name] cv::ocl::Kernel k("magnutude_filter_8u", program); if (k.empty()) { cerr << "Can't get OpenCL kernel" << endl; return 1; } //! [Get OpenCL kernel by name] UMat result(src.size(), CV_8UC1); //! [Define kernel parameters and run] size_t globalSize[2] = {(size_t)src.cols, (size_t)src.rows}; size_t localSize[2] = {8, 8}; bool executionResult = k .args( cv::ocl::KernelArg::ReadOnlyNoSize(src), // size is not used (similar to 'dst' size) cv::ocl::KernelArg::WriteOnly(result), (float)2.0 ) .run(2, globalSize, localSize, true); if (!executionResult) { cerr << "OpenCL kernel launch failed" << endl; return 1; } //! [Define kernel parameters and run] imshow("Source", src); imshow("Result", result); for (;;) { int key = waitKey(); if (key == 27/*ESC*/ || key == 'q' || key == 'Q') break; } } return 0; }
static bool ocl_moments( InputArray _src, Moments& m) { const int TILE_SIZE = 32; const int K = 10; ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, format("-D TILE_SIZE=%d", TILE_SIZE)); if( k.empty() ) return false; UMat src = _src.getUMat(); Size sz = src.size(); int xtiles = (sz.width + TILE_SIZE-1)/TILE_SIZE; int ytiles = (sz.height + TILE_SIZE-1)/TILE_SIZE; int ntiles = xtiles*ytiles; UMat umbuf(1, ntiles*K, CV_32S); size_t globalsize[] = {xtiles, sz.height}, localsize[] = {1, TILE_SIZE}; bool ok = k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::PtrWriteOnly(umbuf), xtiles).run(2, globalsize, localsize, true); if(!ok) return false; Mat mbuf = umbuf.getMat(ACCESS_READ); for( int i = 0; i < ntiles; i++ ) { double x = (i % xtiles)*TILE_SIZE, y = (i / xtiles)*TILE_SIZE; const int* mom = mbuf.ptr<int>() + i*K; double xm = x * mom[0], ym = y * mom[0]; // accumulate moments computed in each tile // + m00 ( = m00' ) m.m00 += mom[0]; // + m10 ( = m10' + x*m00' ) m.m10 += mom[1] + xm; // + m01 ( = m01' + y*m00' ) m.m01 += mom[2] + ym; // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) m.m20 += mom[3] + x * (mom[1] * 2 + xm); // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1]; // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) m.m02 += mom[5] + y * (mom[2] * 2 + ym); // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); } return true; }
void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& newPts, const UMat& newNrm, Affine3f pose, int level, Matx66f &A, Vec6f &b) const { CV_TRACE_FUNCTION(); Size oldSize = oldPts.size(), newSize = newPts.size(); CV_Assert(oldSize == oldNrm.size()); CV_Assert(newSize == newNrm.size()); // calculate 1x7 vector ab to produce b and upper triangle of A: // [A|b] = ab*(ab^t) // and then reduce it across work groups cv::String errorStr; ocl::ProgramSource source = ocl::rgbd::icp_oclsrc; cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; ocl::Kernel k; k.create("getAb", source, options, &errorStr); if(k.empty()) throw std::runtime_error("Failed to create kernel: " + errorStr); size_t globalSize[2]; globalSize[0] = (size_t)newPts.cols; globalSize[1] = (size_t)newPts.rows; const ocl::Device& device = ocl::Device::getDefault(); // workaround for Intel's integrated GPU size_t wgsLimit = device.isIntel() ? 64 : device.maxWorkGroupSize(); size_t memSize = device.localMemSize(); // local memory should keep upperTriangles for all threads in group for reduce const size_t ltsz = UTSIZE*sizeof(float); const size_t lcols = 32; size_t lrows = min(memSize/ltsz, wgsLimit)/lcols; // round lrows down to 2^n lrows = roundDownPow2(lrows); size_t localSize[2] = {lcols, lrows}; Size ngroups((int)divUp(globalSize[0], (unsigned int)localSize[0]), (int)divUp(globalSize[1], (unsigned int)localSize[1])); // size of local buffer for group-wide reduce size_t lsz = localSize[0]*localSize[1]*ltsz; Intr::Projector proj = intrinsics.scale(level).makeProjector(); Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy); UMat& groupedSumGpu = groupedSumBuffers[level]; groupedSumGpu.create(Size(ngroups.width*UTSIZE, ngroups.height), CV_32F); groupedSumGpu.setTo(0); // TODO: optimization possible: // samplers instead of oldPts/oldNrm (mask needed) k.args(ocl::KernelArg::ReadOnlyNoSize(oldPts), ocl::KernelArg::ReadOnlyNoSize(oldNrm), oldSize, ocl::KernelArg::ReadOnlyNoSize(newPts), ocl::KernelArg::ReadOnlyNoSize(newNrm), newSize, ocl::KernelArg::Constant(pose.matrix.val, sizeof(pose.matrix.val)), fxy.val, cxy.val, distanceThreshold*distanceThreshold, cos(angleThreshold), //TODO: replace by KernelArg::Local(lsz) ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz), ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu) ); if(!k.run(2, globalSize, localSize, true)) throw std::runtime_error("Failed to run kernel"); float upperTriangle[UTSIZE]; for(int i = 0; i < UTSIZE; i++) upperTriangle[i] = 0; Mat groupedSumCpu = groupedSumGpu.getMat(ACCESS_READ); for(int y = 0; y < ngroups.height; y++) { const float* rowr = groupedSumCpu.ptr<float>(y); for(int x = 0; x < ngroups.width; x++) { const float* p = rowr + x*UTSIZE; for(int j = 0; j < UTSIZE; j++) { upperTriangle[j] += p[j]; } } } groupedSumCpu.release(); ABtype sumAB = ABtype::zeros(); int pos = 0; for(int i = 0; i < 6; i++) { for(int j = i; j < 7; j++) { sumAB(i, j) = upperTriangle[pos++]; } } // splitting AB matrix to A and b for(int i = 0; i < 6; i++) { // augment lower triangle of A by symmetry for(int j = i; j < 6; j++) { A(i, j) = A(j, i) = sumAB(i, j); } b(i) = sumAB(i, 6); } }
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result) { matchTemplate(_image, _templ, _result, CV_TM_CCORR); UMat temp, image_sums, image_sqsums; integral(_image, image_sums, image_sqsums, CV_32F, CV_32F); int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc, format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); if (k.empty()) return false; UMat templ = _templ.getUMat(); Size size = _image.size(), tsize = templ.size(); _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); UMat result = _result.getUMat(); float scale = 1.f / tsize.area(); if (cn == 1) { float templ_sum = (float)sum(templ)[0]; multiply(templ, templ, temp, 1, CV_32F); float templ_sqsum = (float)sum(temp)[0]; templ_sqsum -= scale * templ_sum * templ_sum; templ_sum *= scale; if (templ_sqsum < DBL_EPSILON) { result = Scalar::all(1); return true; } k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum); } else { Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0); templ_sum = sum(templ); multiply(templ, templ, temp, 1, CV_32F); templ_sqsum = sum(temp); float templ_sqsum_sum = 0; for (int i = 0; i < cn; i ++) templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; templ_sum *= scale; if (templ_sqsum_sum < DBL_EPSILON) { result = Scalar::all(1); return true; } k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum_sum); } size_t globalsize[2] = { result.cols, result.rows }; return k.run(2, globalsize, NULL, false); }
cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window, TermCriteria criteria ) { CV_INSTRUMENT_REGION() const int TOLERANCE = 10; Size size; Mat mat; UMat umat; bool isUMat = _probImage.isUMat(); if (isUMat) umat = _probImage.getUMat(), size = umat.size(); else mat = _probImage.getMat(), size = mat.size(); meanShift( _probImage, window, criteria ); window.x -= TOLERANCE; if( window.x < 0 ) window.x = 0; window.y -= TOLERANCE; if( window.y < 0 ) window.y = 0; window.width += 2 * TOLERANCE; if( window.x + window.width > size.width ) window.width = size.width - window.x; window.height += 2 * TOLERANCE; if( window.y + window.height > size.height ) window.height = size.height - window.y; // Calculating moments in new center mass Moments m = isUMat ? moments(umat(window)) : moments(mat(window)); double m00 = m.m00, m10 = m.m10, m01 = m.m01; double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02; if( fabs(m00) < DBL_EPSILON ) return RotatedRect(); double inv_m00 = 1. / m00; int xc = cvRound( m10 * inv_m00 + window.x ); int yc = cvRound( m01 * inv_m00 + window.y ); double a = mu20 * inv_m00, b = mu11 * inv_m00, c = mu02 * inv_m00; // Calculating width & height double square = std::sqrt( 4 * b * b + (a - c) * (a - c) ); // Calculating orientation double theta = atan2( 2 * b, a - c + square ); // Calculating width & length of figure double cs = cos( theta ); double sn = sin( theta ); double rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02; double rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02; double length = std::sqrt( rotate_a * inv_m00 ) * 4; double width = std::sqrt( rotate_c * inv_m00 ) * 4; // In case, when tetta is 0 or 1.57... the Length & Width may be exchanged if( length < width ) { std::swap( length, width ); std::swap( cs, sn ); theta = CV_PI*0.5 - theta; } // Saving results int _xc = cvRound( xc ); int _yc = cvRound( yc ); int t0 = cvRound( fabs( length * cs )); int t1 = cvRound( fabs( width * sn )); t0 = MAX( t0, t1 ) + 2; window.width = MIN( t0, (size.width - _xc) * 2 ); t0 = cvRound( fabs( length * sn )); t1 = cvRound( fabs( width * cs )); t0 = MAX( t0, t1 ) + 2; window.height = MIN( t0, (size.height - _yc) * 2 ); window.x = MAX( 0, _xc - window.width / 2 ); window.y = MAX( 0, _yc - window.height / 2 ); window.width = MIN( size.width - window.x, window.width ); window.height = MIN( size.height - window.y, window.height ); RotatedRect box; box.size.height = (float)length; box.size.width = (float)width; box.angle = (float)((CV_PI*0.5+theta)*180./CV_PI); while(box.angle < 0) box.angle += 360; while(box.angle >= 360) box.angle -= 360; if(box.angle >= 180) box.angle -= 180; box.center = Point2f( window.x + window.width*0.5f, window.y + window.height*0.5f); return box; }
int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria ) { CV_INSTRUMENT_REGION() Size size; int cn; Mat mat; UMat umat; bool isUMat = _probImage.isUMat(); if (isUMat) umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size(); else mat = _probImage.getMat(), cn = mat.channels(), size = mat.size(); Rect cur_rect = window; CV_Assert( cn == 1 ); if( window.height <= 0 || window.width <= 0 ) CV_Error( Error::StsBadArg, "Input window has non-positive sizes" ); window = window & Rect(0, 0, size.width, size.height); double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.; eps = cvRound(eps*eps); int i, niters = (criteria.type & TermCriteria::MAX_ITER) ? std::max(criteria.maxCount, 1) : 100; for( i = 0; i < niters; i++ ) { cur_rect = cur_rect & Rect(0, 0, size.width, size.height); if( cur_rect == Rect() ) { cur_rect.x = size.width/2; cur_rect.y = size.height/2; } cur_rect.width = std::max(cur_rect.width, 1); cur_rect.height = std::max(cur_rect.height, 1); Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect)); // Calculating center of mass if( fabs(m.m00) < DBL_EPSILON ) break; int dx = cvRound( m.m10/m.m00 - window.width*0.5 ); int dy = cvRound( m.m01/m.m00 - window.height*0.5 ); int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width); int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height); dx = nx - cur_rect.x; dy = ny - cur_rect.y; cur_rect.x = nx; cur_rect.y = ny; // Check for coverage centers mass & window if( dx*dx + dy*dy < eps ) break; } window = cur_rect; return i; }
void UMatToVector(const UMat & um, std::vector<Point2f> & v) const { v.resize(um.size().area()); um.copyTo(Mat(um.size(), CV_32FC2, &v[0])); }
// returns sequence of squares detected on the image. // the sequence is stored in the specified memory storage static void findSquares( const UMat& image, vector<vector<Point> >& squares ) { squares.clear(); UMat pyr, timg, gray0(image.size(), CV_8U), gray; // down-scale and upscale the image to filter out the noise pyrDown(image, pyr, Size(image.cols/2, image.rows/2)); pyrUp(pyr, timg, image.size()); vector<vector<Point> > contours; // find squares in every color plane of the image for( int c = 0; c < 3; c++ ) { int ch[] = {c, 0}; mixChannels(timg, gray0, ch, 1); // try several threshold levels for( int l = 0; l < N; l++ ) { // hack: use Canny instead of zero threshold level. // Canny helps to catch squares with gradient shading if( l == 0 ) { // apply Canny. Take the upper threshold from slider // and set the lower to 0 (which forces edges merging) Canny(gray0, gray, 0, thresh, 5); // dilate canny output to remove potential // holes between edge segments dilate(gray, gray, UMat(), Point(-1,-1)); } else { // apply threshold if l!=0: // tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0 cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY); } // find contours and store them all as a list findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE); vector<Point> approx; // test each contour for( size_t i = 0; i < contours.size(); i++ ) { // approximate contour with accuracy proportional // to the contour perimeter approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true); // square contours should have 4 vertices after approximation // relatively large area (to filter out noisy contours) // and be convex. // Note: absolute value of an area is used because // area may be positive or negative - in accordance with the // contour orientation if( approx.size() == 4 && fabs(contourArea(Mat(approx))) > 1000 && isContourConvex(Mat(approx)) ) { double maxCosine = 0; for( int j = 2; j < 5; j++ ) { // find the maximum cosine of the angle between joint edges double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1])); maxCosine = MAX(maxCosine, cosine); } // if cosines of all angles are small // (all angles are ~90 degree) then write quandrange // vertices to resultant sequence if( maxCosine < 0.3 ) squares.push_back(approx); } } } } }