static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, const std::string & additionalOptions = std::string(), const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat()) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx); if (!additionalOptions.empty()) build_options += additionalOptions; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); if (!data1.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); if (!data2.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
// radiusMatchSingle void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) { if (query.empty() || train.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "radiusMatchSingle"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size())); nMatches.create(1, query.rows, CV_32SC1); if (trainIdx.empty()) { trainIdx.create(query.rows, std::max((train.rows/ 100), 10), CV_32SC1); distance.create(query.rows, std::max((train.rows/ 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); exit: return; }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks) { if (query.empty() || trainCollection.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "matchCollection"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); trainIdx.create(1, query.rows, CV_32S); imgIdx.create(1, query.rows, CV_32S); distance.create(1, query.rows, CV_32F); matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); exit: return; }
// radiusMatchSingle void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) { if (query.empty() || train.empty()) return; const int nQuery = query.rows; const int nTrain = train.rows; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size())); ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); if (trainIdx.empty()) { ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance); } nMatches.setTo(Scalar::all(0)); matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); return; }
// knn match void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask) { if (query.empty() || train.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); const int nQuery = query.rows; const int nTrain = train.rows; if (k == 2) { ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); } else { ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); ensureSizeIsEnough(nQuery, k, CV_32F, distance); ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); } trainIdx.setTo(Scalar::all(-1)); kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); return; }
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches) { if (trainIdx.empty() || distance.empty()) return; Mat trainIdxCPU(trainIdx); Mat distanceCPU(distance); matchConvert(trainIdxCPU, distanceCPU, matches); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult) { if (trainIdx.empty() || distance.empty()) return; Mat trainIdxCPU(trainIdx); Mat distanceCPU(distance); knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float /*maxDistance*/, const std::vector<oclMat> &masks) { if (query.empty() || empty()) return; typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks, const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches); #if 0 static const caller_t callers[3][6] = { { ocl_matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, ocl_matchL1_gpu<unsigned short>, matchL1_gpu<short>, ocl_matchL1_gpu<int>, matchL1_gpu<float> }, { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, ocl_matchL2_gpu<float> }, { ocl_matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, ocl_matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, ocl_matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ } }; #endif const int nQuery = query.rows; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); nMatches.create(1, nQuery, CV_32SC1); if (trainIdx.empty()) { trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); //caller_t func = callers[distType][query.depth()]; //CV_Assert(func != 0); std::vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end()); std::vector<oclMat> masks_(masks.begin(), masks.end()); /* func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches));*/ }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches, std::vector< std::vector<DMatch> > &matches, bool compactResult) { if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) return; Mat trainIdxCPU(trainIdx); Mat imgIdxCPU(imgIdx); Mat distanceCPU(distance); Mat nMatchesCPU(nMatches); radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); }
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) : surf_(surf), img_cols(img.cols), img_rows(img.rows), use_mask(!mask.empty()), counters(oclMat()), imgTex(NULL), sumTex(NULL), maskSumTex(NULL), _img(img) { CV_Assert(!img.empty() && img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); const int min_size = calcSize(surf_.nOctaves - 1, 0); CV_Assert(img_rows - min_size >= 0); CV_Assert(img_cols - min_size >= 0); const int layer_rows = img_rows >> (surf_.nOctaves - 1); const int layer_cols = img_cols >> (surf_.nOctaves - 1); const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; CV_Assert(layer_rows - 2 * min_margin > 0); CV_Assert(layer_cols - 2 * min_margin > 0); maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535); maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535); CV_Assert(maxFeatures > 0); counters.create(1, surf_.nOctaves + 1, CV_32SC1); counters.setTo(Scalar::all(0)); integral(img, surf_.sum); if(support_image2d()) { bindImgTex(img, imgTex); bindImgTex(surf_.sum, sumTex); } maskSumTex = 0; if (use_mask) { CV_Error(CV_StsBadFunc, "Masked SURF detector is not implemented yet"); //!FIXME // temp fix for missing min overload //oclMat temp(mask.size(), mask.type()); //temp.setTo(Scalar::all(1.0)); ////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this //integral(surf_.mask1, surf_.maskSum); //bindImgTex(surf_.maskSum, maskSumTex); } }
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) : surf_(surf), img_cols(img.cols), img_rows(img.rows), use_mask(!mask.empty()), imgTex(NULL), sumTex(NULL), maskSumTex(NULL) { CV_Assert(!img.empty() && img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); const int min_size = calcSize(surf_.nOctaves - 1, 0); CV_Assert(img_rows - min_size >= 0); CV_Assert(img_cols - min_size >= 0); const int layer_rows = img_rows >> (surf_.nOctaves - 1); const int layer_cols = img_cols >> (surf_.nOctaves - 1); const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; CV_Assert(layer_rows - 2 * min_margin > 0); CV_Assert(layer_cols - 2 * min_margin > 0); maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535); maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535); CV_Assert(maxFeatures > 0); counters.create(1, surf_.nOctaves + 1, CV_32SC1); counters.setTo(Scalar::all(0)); //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold)); bindImgTex(img, imgTex); integral(img, surf_.sum); // the two argumented integral version is incorrect bindImgTex(surf_.sum, sumTex); maskSumTex = 0; if (use_mask) { throw std::exception(); //!FIXME // temp fix for missing min overload //oclMat temp(mask.size(), mask.type()); //temp.setTo(Scalar::all(1.0)); ////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this //integral(surf_.mask1, surf_.maskSum); //bindImgTex(surf_.maskSum, maskSumTex); } }
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask) { assert(mask.empty()); // mask is not supported at the moment oclMat trainIdx, distance; matchSingle(query, train, trainIdx, distance, mask); matchDownload(trainIdx, distance, matches); }
int findCorners_caller( const TextureCL& eig, const float threshold, const oclMat& mask, oclMat& corners, const int max_count) { std::vector<int> k; Context * cxt = Context::getContext(); std::vector< std::pair<size_t, const void*> > args; std::string kernelname = "findCorners"; const int mask_strip = mask.step / mask.elemSize1(); oclMat g_counter(1, 1, CV_32SC1); g_counter.setTo(0); args.push_back(make_pair( sizeof(cl_mem), (void*)&eig )); args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data )); args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data )); args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip)); args.push_back(make_pair( sizeof(cl_float), (void*)&threshold )); args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows )); args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols )); args.push_back(make_pair( sizeof(cl_int), (void*)&max_count )); args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data )); size_t globalThreads[3] = {eig.cols, eig.rows, 1}; size_t localThreads[3] = {16, 16, 1}; const char * opt = mask.empty() ? "" : "-D WITH_MASK"; openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt); return std::min(Mat(g_counter).at<int>(0), max_count); }
static void copyTo(const oclMat &src, oclMat &m ) { CV_DbgAssert(!src.empty()); m.create(src.size(), src.type()); openCLCopyBuffer2D(src.clCxt, m.data, m.step, m.offset, src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset); }
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, const oclMat &mask) { if (query.empty() || train.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); ensureSizeIsEnough(1, query.rows, CV_32S, trainIdx); ensureSizeIsEnough(1, query.rows, CV_32F, distance); matchDispatcher(query, train, mask, trainIdx, distance, distType); return; }
void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints) { if (image.empty()) return; (*this)(image, mask, d_keypoints_); downloadKeypoints(d_keypoints_, keypoints); }
void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints) { if (d_keypoints.empty()) return; Mat h_keypoints(d_keypoints); convertKeypoints(h_keypoints, keypoints); }
static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, const std::string & additionalOptions = std::string(), const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat()) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); int pixels_per_work_item = 1; if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE)) { if ((src.cols % 4 == 0) && (src.depth() == CV_8U)) pixels_per_work_item = 4; else if (src.cols % 2 == 0) pixels_per_work_item = 2; else pixels_per_work_item = 1; } std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item); if (!additionalOptions.empty()) build_options += additionalOptions; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); if (!data1.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); if (!data2.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints) { size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0], divUp(img.rows - 6, localThreads[1]) * localThreads[1], 1 }; Context *clCxt = Context::getContext(); String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask"; std::vector< std::pair<size_t, const void *> > args; int counter = 0; int err = CL_SUCCESS; cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_COPY_HOST_PTR, sizeof(int), &counter, &err); int kpLocStep = kpLoc_.step / kpLoc_.elemSize(); int scoreStep = score_.step / score_.elemSize(); int nms = (nonmaxSupression) ? 1 : 0; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep)); openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counterCL)); return counter; }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks) { if (query.empty() || trainCollection.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); return; }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &/*maskCollection*/) { if (query.empty() || trainCollection.empty()) return; typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks, const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance); #if 0 static const caller_t callers[3][6] = { { ocl_match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/, ocl_match2L1_gpu<unsigned short>, ocl_match2L1_gpu<short>, ocl_match2L1_gpu<int>, ocl_match2L1_gpu<float> }, { 0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/, 0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/, 0/*match2L2_gpu<int>*/, ocl_match2L2_gpu<float> }, { ocl_match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/, ocl_match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/, ocl_match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/ } }; #endif CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; trainIdx.create(1, nQuery, CV_32SC2); imgIdx.create(1, nQuery, CV_32SC2); distance.create(1, nQuery, CV_32SC2); trainIdx.setTo(Scalar::all(-1)); //caller_t func = callers[distType][query.depth()]; //CV_Assert(func != 0); //func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream)); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k, const std::vector<oclMat> &masks, bool compactResult) { if (k == 2) { oclMat trainCollection; oclMat maskCollection; makeGpuCollection(trainCollection, maskCollection, masks); oclMat trainIdx, imgIdx, distance; knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); knnMatch2Download(trainIdx, imgIdx, distance, matches); } else { if (query.empty() || empty()) return; std::vector< std::vector<DMatch> > curMatches; std::vector<DMatch> temp; temp.reserve(2 * k); matches.resize(query.rows); std::for_each(matches.begin(), matches.end(), std::bind2nd(std::mem_fun_ref(&std::vector<DMatch>::reserve), k)); for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx) { knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? oclMat() : masks[imgIdx]); for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) { std::vector<DMatch> &localMatch = curMatches[queryIdx]; std::vector<DMatch> &globalMatch = matches[queryIdx]; for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx))); temp.clear(); merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); globalMatch.clear(); const size_t count = std::min((size_t)k, temp.size()); copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch)); } } if (compactResult) { std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty)); matches.erase(new_end, matches.end()); } } }
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const { if (mask.empty()) { copyTo(mat); } else { mat.create(size(), type()); copy_to_with_mask(*this, mat, mask, "copy_to_with_mask"); } }
// knn match void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask) { if (query.empty() || train.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "knnMatchSingle"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); if (k == 2) { trainIdx.create(1, query.rows, CV_32SC2); distance.create(1, query.rows, CV_32FC2); } else { trainIdx.create(query.rows, k, CV_32S); distance.create(query.rows, k, CV_32F); allDist.create(query.rows, train.rows, CV_32FC1); } trainIdx.setTo(Scalar::all(-1)); kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); exit: return; }
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const { if (mask.empty()) { CV_DbgAssert(!this->empty()); mat.create(size(), type()); openCLCopyBuffer2D(clCxt, mat.data, mat.step, mat.offset, data, step, cols * elemSize(), rows, offset); } else { mat.create(size(), type()); copy_to_with_mask(*this, mat, mask, "copy_to_with_mask"); } }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &/*maskCollection*/) { if (query.empty() || trainCollection.empty()) return; // typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks, // const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance); CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); trainIdx.setTo(Scalar::all(-1)); //caller_t func = callers[distType][query.depth()]; //CV_Assert(func != 0); //func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream)); }
int cv::ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& img, const oclMat& mask) { CV_Assert(img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area()); ensureSizeIsEnough(ROWS_COUNT, maxKeypoints, CV_32SC1, kpLoc_); kpLoc_.setTo(Scalar::all(0)); if (nonmaxSupression) { ensureSizeIsEnough(img.size(), CV_32SC1, score_); score_.setTo(Scalar::all(0)); } count_ = calcKeypointsOCL(img, mask, maxKeypoints); count_ = std::min(count_, maxKeypoints); return count_; }
void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2) { using namespace ocl_tvl1flow; const double scaledEpsilon = epsilon * epsilon * I0.size().area(); CV_DbgAssert( I1.size() == I0.size() ); CV_DbgAssert( I1.type() == I0.type() ); CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); CV_DbgAssert( u2.size() == u1.size() ); if (u1.empty()) { u1.create(I0.size(), CV_32FC1); u1.setTo(Scalar::all(0)); u2.create(I0.size(), CV_32FC1); u2.setTo(Scalar::all(0)); } oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); centeredGradient(I1, I1x, I1y); oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); p11.setTo(Scalar::all(0)); p12.setTo(Scalar::all(0)); p21.setTo(Scalar::all(0)); p22.setTo(Scalar::all(0)); oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); const float l_t = static_cast<float>(lambda * theta); const float taut = static_cast<float>(tau / theta); for (int warpings = 0; warpings < warps; ++warpings) { warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = numeric_limits<double>::max(); double prev_error = 0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { // some tweaks to make sum operation less frequently char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calc_error); if(calc_error) { error = ocl::sum(diff)[0]; prev_error = error; } else { error = numeric_limits<double>::max(); prev_error -= scaledEpsilon; } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); } } }
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) { if (prevPts.empty()) { nextPts.release(); status.release(); //if (err) err->release(); return; } derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); iters = std::min(std::max(iters, 0), 100); const int cn = prevImg.oclchannels(); dim3 block, patch; calcPatchSize(winSize, cn, block, patch, isDeviceArch11_); CV_Assert(derivLambda >= 0); CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); if (useInitialFlow) CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); else ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f)); multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); //status.setTo(Scalar::all(1)); setTo(status, Scalar::all(1)); bool errMat = false; if (!err) { err = new oclMat(1, prevPts.cols, CV_32FC1); errMat = true; } else ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); //ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err); // build the image pyramids. prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); if (cn == 1 || cn == 4) { //prevImg.convertTo(prevPyr_[0], CV_32F); //nextImg.convertTo(nextPyr_[0], CV_32F); convertTo(prevImg, prevPyr_[0], CV_32F); convertTo(nextImg, nextPyr_[0], CV_32F); } else { //oclMat buf_; // cvtColor(prevImg, buf_, COLOR_BGR2BGRA); // buf_.convertTo(prevPyr_[0], CV_32F); // cvtColor(nextImg, buf_, COLOR_BGR2BGRA); // buf_.convertTo(nextPyr_[0], CV_32F); } for (int level = 1; level <= maxLevel; ++level) { pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]); pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]); } // dI/dx ~ Ix, dI/dy ~ Iy for (int level = maxLevel; level >= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols, level, /*block, */patch, winSize, iters); } clFinish(prevImg.clCxt->impl->clCmdQueue); if(errMat) delete err; }
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, const oclMat &src3, double beta, oclMat &dst, int flags) { CV_Assert(src1.cols == src2.rows && (src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols))); CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported if(!src3.empty()) { src3.copyTo(dst); } else { dst.create(src1.rows, src2.cols, src1.type()); dst.setTo(Scalar::all(0)); } clBlasSetup(); const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasOrder order = clAmdBlasRowMajor; const int M = src1.rows; const int N = src2.cols; const int K = src1.cols; int lda = src1.step; int ldb = src2.step; int ldc = dst.step; int offa = src1.offset; int offb = src2.offset; int offc = dst.offset; cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr(); switch(src1.type()) { case CV_32FC1: lda /= sizeof(float); ldb /= sizeof(float); ldc /= sizeof(float); offa /= sizeof(float); offb /= sizeof(float); offc /= sizeof(float); openCLSafeCall ( clAmdBlasSgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_64FC1: lda /= sizeof(double); ldb /= sizeof(double); ldc /= sizeof(double); offa /= sizeof(double); offb /= sizeof(double); offc /= sizeof(double); openCLSafeCall ( clAmdBlasDgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_32FC2: { lda /= (2*sizeof(float)); ldb /= (2*sizeof(float)); ldc /= (2*sizeof(float)); offa /= (2*sizeof(float)); offb /= (2*sizeof(float)); offc /= (2*sizeof(float)); cl_float2 alpha_2 = {{alpha, 0}}; cl_float2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasCgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; case CV_64FC2: { lda /= (2*sizeof(double)); ldb /= (2*sizeof(double)); ldc /= (2*sizeof(double)); offa /= (2*sizeof(double)); offb /= (2*sizeof(double)); offc /= (2*sizeof(double)); cl_double2 alpha_2 = {{alpha, 0}}; cl_double2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasZgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; } }