// radiusMatchSingle void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) { if (query.empty() || train.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "radiusMatchSingle"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size())); nMatches.create(1, query.rows, CV_32SC1); if (trainIdx.empty()) { trainIdx.create(query.rows, std::max((train.rows/ 100), 10), CV_32SC1); distance.create(query.rows, std::max((train.rows/ 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); exit: return; }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks) { if (query.empty() || trainCollection.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "matchCollection"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); trainIdx.create(1, query.rows, CV_32S); imgIdx.create(1, query.rows, CV_32S); distance.create(1, query.rows, CV_32F); matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); exit: return; }
/////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta) { String kernelName = "convert_to_S"; std::stringstream idxStr; idxStr << src.depth(); kernelName = kernelName + idxStr.str().c_str(); float alpha_f = alpha, beta_f = beta; CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); if(dst.type() == CV_8UC1) { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; } args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth()); }
void cv::ocl::distanceToCenters(const oclMat &src, const oclMat ¢ers, Mat &dists, Mat &labels, int distType) { CV_Assert(src.cols * src.channels() == centers.cols * centers.channels()); CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F); CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR); dists.create(src.rows, 1, CV_32FC1); labels.create(src.rows, 1, CV_32SC1); std::stringstream build_opt_ss; build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST"); int src_step = src.step / src.elemSize1(); int centers_step = centers.step / centers.elemSize1(); int feature_width = centers.cols * centers.oclchannels(); int src_offset = src.offset / src.elemSize1(); int centers_offset = centers.offset / centers.elemSize1(); int all_dist_count = src.rows * centers.rows; oclMat all_dist(1, all_dist_count, CV_32FC1); vector<pair<size_t, const void *> > args; args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data)); args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_step)); args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_step)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset)); args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_offset)); size_t globalThreads[3] = { all_dist_count, 1, 1 }; openCLExecuteKernel(Context::getContext(), &kmeans_kernel, "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str()); Mat all_dist_cpu; all_dist.download(all_dist_cpu); for (int i = 0; i < src.rows; ++i) { Point p; double minVal; Rect roi(i * centers.rows, 0, centers.rows, 1); Mat hdr(all_dist_cpu, roi); cv::minMaxLoc(hdr, &minVal, NULL, &p); dists.at<float>(i, 0) = static_cast<float>(minVal); labels.at<int>(i, 0) = p.x; } }
static void lkSparse_run(oclMat &I, oclMat &J, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; String kernelName = "lkSparse"; size_t localThreads[3] = { 8, 8, 1 }; size_t globalThreads[3] = { 8 * ptcount, 8, 1}; int cn = I.oclchannels(); char calcErr = level==0?1:0; std::vector<std::pair<size_t , const void *> > args; cl_mem ITex = bindTexture(I); cl_mem JTex = bindTexture(J); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&level )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); bool is_cpu = isCpuDevice(); if (is_cpu) { openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); } else { std::stringstream idxStr; idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0}; sprintf(opt, "-D WAVE_SIZE=%d", wave_size); openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt); } releaseTexture(ITex); releaseTexture(JTex); }
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName) { std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); if (dst.type() == CV_8UC1) globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char channelMap[] = { ' ', ' ', '2', '4', '4' }; std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); Mat mat(1, 1, dst.type(), scalar); #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && dst.offset == 0 && dst.cols == dst.wholecols) { const int sizeofMap[][7] = { { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, }; int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, 0, dst.step * dst.rows, 0, NULL, NULL); } else #endif { oclMat m(mat); args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } }
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2, oclMat &dst) { CV_Assert(src1.depth() <= CV_32F); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() && weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1); dst.create(src1.size(), src1.type()); size_t globalSize[] = { (size_t)dst.cols, (size_t)dst.rows, 1}; size_t localSize[] = { 16, 16, 1 }; int depth = dst.depth(), ocn = dst.oclchannels(); int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize(); int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize(); int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize(); int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize(); int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); const char * const channelMap[] = { "", "", "2", "4", "4" }; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s", typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn], depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]); vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args, -1, -1, buildOptions.c_str()); }
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &trainIdx, const oclMat &distance, int distType) { cv::ocl::Context *ctx = query.clCxt; size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); int block_size = BLOCK_SIZE; int m_size = MAX_DESC_LEN; std::vector< std::pair<size_t, const void *> > args; char opt [OPT_SIZE] = ""; sprintf(opt, "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d", T_ARR[query.depth()], distType, block_size, m_size); if(globalSize[0] != 0) { args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data )); //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data )); args.push_back( std::make_pair( smemSize, (void *)NULL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step )); String kernelName = "BruteForceMatch_UnrollMatch"; openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt); } }
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta ) { //cout << "cv::ocl::oclMat::convertTo()" << endl; bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); if( rtype < 0 ) rtype = src.type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels()); int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { src.copyTo(dst); return; } oclMat temp; const oclMat *psrc = &src; if( sdepth != ddepth && psrc == &dst ) psrc = &(temp = src); dst.create( src.size(), rtype ); convert_run_cus(*psrc, dst, alpha, beta); }
// knn match void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask) { if (query.empty() || train.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); const int nQuery = query.rows; const int nTrain = train.rows; if (k == 2) { ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); } else { ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); ensureSizeIsEnough(nQuery, k, CV_32F, distance); ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); } trainIdx.setTo(Scalar::all(-1)); kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); return; }
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, const std::string & additionalOptions = std::string(), const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat()) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx); if (!additionalOptions.empty()) build_options += additionalOptions; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); if (!data1.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); if (!data2.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
static void convert_C3C4(const cl_mem &src, oclMat &dst) { Context *clCxt = dst.clCxt; int pixel_end = dst.wholecols * dst.wholerows - 1; int dstStep_in_pixel = dst.step1() / dst.oclchannels(); const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]); std::vector< std::pair<size_t, const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholecols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholerows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end)); size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 }; #ifdef ANDROID openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL, args, -1, -1, buildOptions.c_str()); #else size_t localThreads[3] = { 256, 1, 1 }; openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); #endif }
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const { if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && (depth() == CV_64F || dst.depth() == CV_64F)) { CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); if( rtype < 0 ) rtype = type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { copyTo(dst); return; } oclMat temp; const oclMat *psrc = this; if( sdepth != ddepth && psrc == &dst ) psrc = &(temp = *this); dst.create( size(), rtype ); convert_run(*psrc, dst, alpha, beta); }
static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char channelMap[] = { ' ', ' ', '2', '4', '4' }; std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); oclMat m(Mat(1, 1, dst.type(), scalar)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&m.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); }
static void pyrDown_cus(const oclMat &src, oclMat &dst) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); pyrdown_run_cus(src, dst); }
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result) { cv::ocl::Context *ctx = img1.clCxt; assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); int channels = img1.oclchannels(); int depth = img1.depth(); int rows = img1.rows; int cols = img1.cols; int istep = img1.step1(); int wstep = weights1.step1(); size_t globalSize[] = {cols * channels / 4, rows, 1}; size_t localSize[] = {256, 1, 1}; vector< pair<size_t, const void *> > args; if(globalSize[0] != 0) { args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); std::string kernelName = "BlendLinear"; openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); } }
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName) { std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d", src.depth(), greenbits, src.channels(), bidx); int src_offset = (int)src.offset, src_step = (int)src.step; int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
// radiusMatchSingle void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) { if (query.empty() || train.empty()) return; const int nQuery = query.rows; const int nTrain = train.rows; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size())); ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); if (trainIdx.empty()) { ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance); } nMatches.setTo(Scalar::all(0)); matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); return; }
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER"); vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str()); }
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); vector<pair<size_t , const void *> > args; cl_float4 val; val.s[0] = scalar.val[0]; val.s[1] = scalar.val[1]; val.s[2] = scalar.val[2]; val.s[3] = scalar.val[3]; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; if(dst.type() == CV_8UC1) { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; } int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); args.push_back( make_pair( sizeof(cl_float4) , (void *)&val )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, localThreads, args, dst.channels(), dst.depth()); }
//////////////////////////////////////////////////////////////////////////// ////////////////////merge////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst) { Context *clCxt = mat_dst.clCxt; int channels = mat_dst.channels(); int depth = mat_dst.depth(); string kernelName = "merge_vector"; int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, {4, 4, 2, 2, 1, 1, 1}, {4, 4, 2, 2 , 1, 1, 1}, {4, 4, 2, 2, 1, 1, 1} }; size_t index = indexes[channels-1][mat_dst.depth()]; int cols = divUp(mat_dst.cols, index); size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], divUp(mat_dst.rows, localThreads[1]) * localThreads[1], 1 }; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step)); if(n >= 3) { args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step)); } if(n >= 4) { args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step)); } openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth); }
void cv::ocl::pyrDown(const oclMat &src, oclMat &dst) { int depth = src.depth(), channels = src.channels(); CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F); CV_Assert(channels == 1 || channels == 3 || channels == 4); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); pyrdown_run(src, dst); }
/////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && src.rows == dst.rows && src.cols == dst.cols); vector<pair<size_t , const void *> > args; int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1}, {2, 2, 1, 1, 1, 1, 1}, {8, 8, 8, 8 , 4, 4, 4}, //vector length is undefined when channels = 3 {1, 1, 1, 1, 1, 1, 1} }; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; int vector_length = vector_lengths[dst.channels() -1][dst.depth()]; int offset_cols = divUp(dst.offset, dst.elemSize()) & (vector_length - 1); int cols = vector_length == 1 ? divUp(dst.cols, vector_length) : divUp(dst.cols + offset_cols, vector_length); globalThreads[0] = divUp(cols, localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads, localThreads, args, dst.channels(), dst.depth()); }
// knn match void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask) { if (query.empty() || train.empty()) return; // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int int callType = query.depth(); char cvFuncName[] = "knnMatchSingle"; if (callType != 5) CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n"); if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0 || callType != 2 || callType != 4))) { CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n"); } CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); if (k == 2) { trainIdx.create(1, query.rows, CV_32SC2); distance.create(1, query.rows, CV_32FC2); } else { trainIdx.create(query.rows, k, CV_32S); distance.create(query.rows, k, CV_32F); allDist.create(query.rows, train.rows, CV_32FC1); } trainIdx.setTo(Scalar::all(-1)); kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); exit: return; }
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float /*maxDistance*/, const std::vector<oclMat> &masks) { if (query.empty() || empty()) return; typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks, const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches); #if 0 static const caller_t callers[3][6] = { { ocl_matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, ocl_matchL1_gpu<unsigned short>, matchL1_gpu<short>, ocl_matchL1_gpu<int>, matchL1_gpu<float> }, { 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<int>*/, ocl_matchL2_gpu<float> }, { ocl_matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, ocl_matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, ocl_matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ } }; #endif const int nQuery = query.rows; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); nMatches.create(1, nQuery, CV_32SC1); if (trainIdx.empty()) { trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); //caller_t func = callers[distType][query.depth()]; //CV_Assert(func != 0); std::vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end()); std::vector<oclMat> masks_(masks.begin(), masks.end()); /* func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches));*/ }
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta) { String kernelName = "convert_to"; float alpha_f = alpha, beta_f = beta; int sdepth = src.depth(), ddepth = dst.depth(); int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1(); int cols1 = src.cols * src.oclchannels(); char buildOptions[150], convertString[50]; const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]); sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth], typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString); CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0], divUp(dst.rows, localThreads[1]) * localThreads[1], 1 }; int doffset1 = dst.offset / dst.elemSize1(); int soffset1 = src.offset / src.elemSize1(); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols1 )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sstep1 )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&soffset1 )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstep1 )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&doffset1 )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions); }
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, const oclMat &mask) { if (query.empty() || train.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); ensureSizeIsEnough(1, query.rows, CV_32S, trainIdx); ensureSizeIsEnough(1, query.rows, CV_32F, distance); matchDispatcher(query, train, mask, trainIdx, distance, distType); return; }
//////////////////////////////////////////////////////////////////////// // convert_C4C3 static void convert_C4C3(const oclMat &src, cl_mem &dst) { int srcStep_in_pixel = src.step1() / src.oclchannels(); int pixel_end = src.wholecols * src.wholerows - 1; Context *clCxt = src.clCxt; string kernelName = "convertC4C3"; char compile_option[32]; switch(src.depth()) { case 0: sprintf(compile_option, "-D GENTYPE4=uchar4"); break; case 1: sprintf(compile_option, "-D GENTYPE4=char4"); break; case 2: sprintf(compile_option, "-D GENTYPE4=ushort4"); break; case 3: sprintf(compile_option, "-D GENTYPE4=short4"); break; case 4: sprintf(compile_option, "-D GENTYPE4=int4"); break; case 5: sprintf(compile_option, "-D GENTYPE4=float4"); break; case 6: sprintf(compile_option, "-D GENTYPE4=double4"); break; default: CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); }
static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, const std::string & additionalOptions = std::string(), const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat()) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); int pixels_per_work_item = 1; if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE)) { if ((src.cols % 4 == 0) && (src.depth() == CV_8U)) pixels_per_work_item = 4; else if (src.cols % 2 == 0) pixels_per_work_item = 2; else pixels_per_work_item = 1; } std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item); if (!additionalOptions.empty()) build_options += additionalOptions; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); if (!data1.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); if (!data2.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks) { if (query.empty() || trainCollection.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); return; }