void cv::ocl::oclMat::copyTo( oclMat &m ) const { CV_DbgAssert(!this->empty()); m.create(size(), type()); openCLCopyBuffer2D(clCxt, m.data, m.step, m.offset, data, step, cols * elemSize(), rows, offset, clMemcpyDeviceToDevice); }
//////////////////////////////////////////////////////////////////////// // convert_C4C3 static void convert_C4C3(const oclMat &src, cl_mem &dst) { int srcStep_in_pixel = src.step1() / src.oclchannels(); int pixel_end = src.wholecols * src.wholerows - 1; Context *clCxt = src.clCxt; string kernelName = "convertC4C3"; char compile_option[32]; switch(src.depth()) { case 0: sprintf(compile_option, "-D GENTYPE4=uchar4"); break; case 1: sprintf(compile_option, "-D GENTYPE4=char4"); break; case 2: sprintf(compile_option, "-D GENTYPE4=ushort4"); break; case 3: sprintf(compile_option, "-D GENTYPE4=short4"); break; case 4: sprintf(compile_option, "-D GENTYPE4=int4"); break; case 5: sprintf(compile_option, "-D GENTYPE4=float4"); break; case 6: sprintf(compile_option, "-D GENTYPE4=double4"); break; default: CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); }
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1}; int weight_step = (int)(weight.step/weight.elemSize()); int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int dst_step = (int)(dst.step/dst.elemSize()); int dst_y = (int)(dst.offset/dst.step); int dst_x = (int)(dst.offset%dst.step); dst_x = dst_x/(int)dst.elemSize(); String kernel_name = "getBackgroundImage2_kernel"; std::vector<std::pair<size_t, const void*> > args; char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints) { if (image.empty()) return; (*this)(image, mask, d_keypoints_); downloadKeypoints(d_keypoints_, keypoints); }
void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints) { if (d_keypoints.empty()) return; Mat h_keypoints(d_keypoints); convertKeypoints(h_keypoints, keypoints); }
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER"); vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str()); }
int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints) { size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0], divUp(img.rows - 6, localThreads[1]) * localThreads[1], 1 }; Context *clCxt = Context::getContext(); String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask"; std::vector< std::pair<size_t, const void *> > args; int counter = 0; int err = CL_SUCCESS; cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_COPY_HOST_PTR, sizeof(int), &counter, &err); int kpLocStep = kpLoc_.step / kpLoc_.elemSize(); int scoreStep = score_.step / score_.elemSize(); int nms = (nonmaxSupression) ? 1 : 0; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep)); openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counterCL)); return counter; }
// knn match void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask) { if (query.empty() || train.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); const int nQuery = query.rows; const int nTrain = train.rows; if (k == 2) { ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); } else { ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); ensureSizeIsEnough(nQuery, k, CV_32F, distance); ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); } trainIdx.setTo(Scalar::all(-1)); kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); return; }
static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var, int nmixtures, float varThreshold, float backgroundRatio) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {frame.cols, frame.rows, 1}; int frame_step = (int)(frame.step/frame.elemSize()); int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); int weight_step = (int)(weight.step/weight.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int var_step = (int)(var.step/var.elemSize()); int fgmask_offset_y = (int)(fgmask.offset/fgmask.step); int fgmask_offset_x = (int)(fgmask.offset%fgmask.step); fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize(); int frame_offset_y = (int)(frame.offset/frame.step); int frame_offset_x = (int)(frame.offset%frame.step); frame_offset_x = frame_offset_x/(int)frame.elemSize(); char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } String kernel_name = "mog_withoutLearning_kernel"; std::vector<std::pair<size_t, const void*> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, const std::string & additionalOptions = std::string(), const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat()) { int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx); if (!additionalOptions.empty()) build_options += additionalOptions; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); if (!data1.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); if (!data2.empty()) args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); size_t gt[3] = { dst.cols, dst.rows, 1 }; #ifdef ANDROID size_t lt[3] = { 16, 10, 1 }; #else size_t lt[3] = { 16, 16, 1 }; #endif openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); }
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result) { cv::ocl::Context *ctx = img1.clCxt; assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); int channels = img1.oclchannels(); int depth = img1.depth(); int rows = img1.rows; int cols = img1.cols; int istep = img1.step1(); int wstep = weights1.step1(); size_t globalSize[] = {cols * channels / 4, rows, 1}; size_t localSize[] = {256, 1, 1}; vector< pair<size_t, const void *> > args; result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels())); if(globalSize[0] != 0) { args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); std::string kernelName = "BlendLinear"; openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); } }
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks) { if (query.empty() || trainCollection.empty()) return; CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); return; }
//////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////split///////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////// void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst) { Context *clCxt = mat_src.clCxt; int channels = mat_src.channels(); int depth = mat_src.depth(); string kernelName = "split_vector"; int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0}, {8, 8, 8, 8, 4, 4, 2}, {8, 8, 8, 8 , 4, 4, 4}, {4, 4, 2, 2, 1, 1, 1} }; size_t index = indexes[channels-1][mat_dst[0].depth()]; int cols = divUp(mat_src.cols, index); size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], divUp(mat_src.rows, localThreads[1]) * localThreads[1], 1 }; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step)); if(channels >= 3) { args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step)); } if(channels >= 4) { args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step)); } openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth); }
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) { CV_Assert(prevImg.type() == CV_8UC1); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(maxLevel >= 0); CV_Assert(winSize.width > 2 && winSize.height > 2); if (err) err->create(prevImg.size(), CV_32FC1); prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); prevPyr_[0] = prevImg; //nextImg.convertTo(nextPyr_[0], CV_32F); convertTo(nextImg, nextPyr_[0], CV_32F); for (int level = 1; level <= maxLevel; ++level) { pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]); pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]); } ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]); //uPyr_[1].setTo(Scalar::all(0)); //vPyr_[1].setTo(Scalar::all(0)); setTo(uPyr_[1], Scalar::all(0)); setTo(vPyr_[1], Scalar::all(0)); Size winSize2i(winSize.width, winSize.height); int idx = 0; for (int level = maxLevel; level >= 0; level--) { int idx2 = (idx + 1) & 1; lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2], level == 0 ? err : 0, winSize2i, iters); if (level > 0) idx = idx2; } //uPyr_[idx].copyTo(u); //vPyr_[idx].copyTo(v); copyTo(uPyr_[idx], u); copyTo(vPyr_[idx], v); clFinish(prevImg.clCxt->impl->clCmdQueue); }
//////////////////////////////////////////////////////////////////////// // convert_C4C3 void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep) { int srcStep = src.step1() / src.channels(); Context *clCxt = src.clCxt; string kernelName = "convertC4C3"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); size_t globalThreads[3] = {(src.wholecols *src.wholerows + 255) / 256 * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, src.elemSize1() >> 1); }
void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks) { if (empty()) return; if (masks.empty()) { Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) *trainCollectionCPU_ptr = trainDescCollection[i]; trainCollection.upload(trainCollectionCPU); maskCollection.release(); } else { CV_Assert(masks.size() == trainDescCollection.size()); Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>(); oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) { const oclMat &train = trainDescCollection[i]; const oclMat &mask = masks[i]; CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows)); *trainCollectionCPU_ptr = train; *maskCollectionCPU_ptr = mask; } trainCollection.upload(trainCollectionCPU); maskCollection.upload(maskCollectionCPU); } }
void detectKeypoints(oclMat &keypoints) { // create image pyramid buffers // different layers have same sized buffers, but they are sampled from gaussin kernel. ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det); ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace); ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer); ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints); keypoints.setTo(Scalar::all(0)); for (int octave = 0; octave < surf_.nOctaves; ++octave) { const int layer_rows = img_rows >> octave; const int layer_cols = img_cols >> octave; //loadOctaveConstants(octave, layer_rows, layer_cols); icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows); icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave, octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave); maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates)); if (maxCounter > 0) { icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter, keypoints, counters, octave, layer_rows, maxFeatures); } } unsigned int featureCounter = Mat(counters).at<unsigned int>(0); featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures)); keypoints.cols = featureCounter; if (surf_.upright) keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0)); else findOrientation(keypoints); }
static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag) { Context *clCxt = Context::getContext(); String kernelName = "svm_rbf"; int width = var_count; int src_step = (int)src.step / src.elemSize(); int src_e_step = (int)src_e.step / src_e.elemSize(); int dst_step = (int)dst.step / dst.elemSize(); int x = MIN(16, src_rows); int y = MIN(16, src2_cols); size_t localThreads[] = {x, y, 1}; size_t globalThreads[] = {src2_cols, src_rows, 1}; char build_options[50]; if(flag) sprintf(build_options, "-D ADDEXP"); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src_e.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_e_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&width)); float gamma = 0.0f; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { gamma = (float)gamma1; args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma)); } else args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1)); openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options); }
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k, const std::vector<oclMat> &masks, bool compactResult) { if (k == 2) { oclMat trainCollection; oclMat maskCollection; makeGpuCollection(trainCollection, maskCollection, masks); oclMat trainIdx, imgIdx, distance; knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); knnMatch2Download(trainIdx, imgIdx, distance, matches); } else { if (query.empty() || empty()) return; std::vector< std::vector<DMatch> > curMatches; std::vector<DMatch> temp; temp.reserve(2 * k); matches.resize(query.rows); std::for_each(matches.begin(), matches.end(), std::bind2nd(std::mem_fun_ref(&std::vector<DMatch>::reserve), k)); for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx) { knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? oclMat() : masks[imgIdx]); for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) { std::vector<DMatch> &localMatch = curMatches[queryIdx]; std::vector<DMatch> &globalMatch = matches[queryIdx]; for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx))); temp.clear(); merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); globalMatch.clear(); const size_t count = std::min((size_t)k, temp.size()); copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch)); } } if (compactResult) { std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty)); matches.erase(new_end, matches.end()); } } }
static void split(const oclMat &mat_src, oclMat *mat_dst) { CV_Assert(mat_dst); int depth = mat_src.depth(); int num_channels = mat_src.oclchannels(); Size size = mat_src.size(); if(num_channels == 1) { mat_src.copyTo(mat_dst[0]); return; } int i; for(i = 0; i < num_channels; i++) mat_dst[i].create(size, CV_MAKETYPE(depth, 1)); split_vector_run(mat_src, mat_dst); }
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y) { CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous()); Mat K_Rinv = K * R.t(); CV_Assert(K_Rinv.isContinuous()); Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3 KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1); KRT_mat(Range::all(), Range(9, 11)) = T; oclMat KRT_oclMat(KRT_mat); // transfer K_Rinv and T into a single cl_mem map_x.create(dst_roi.size(), CV_32F); map_y.create(dst_roi.size(), CV_32F); int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; Context *clCxt = Context::getContext(); string kernelName = "buildWarpPlaneMaps"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step)); args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; size_t localThreads[3] = {32, 8, 1}; openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); }
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta ) { //cout << "cv::ocl::oclMat::convertTo()" << endl; bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); if( rtype < 0 ) rtype = src.type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels()); int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { src.copyTo(dst); return; } oclMat temp; const oclMat *psrc = &src; if( sdepth != ddepth && psrc == &dst ) psrc = &(temp = src); dst.create( src.size(), rtype ); convert_run_cus(*psrc, dst, alpha, beta); }
/////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && src.rows == dst.rows && src.cols == dst.cols && mask.type() == CV_8UC1); vector<pair<size_t , const void *> > args; std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"}, {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"}, {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"}, {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"} }; char compile_option[32]; sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str()); size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); }
/////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta) { std::string kernelName = "convert_to_S"; std::stringstream idxStr; idxStr << src.depth(); kernelName += idxStr.str(); float alpha_f = (float)alpha, beta_f = (float)beta; CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); if(dst.type() == CV_8UC1) { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; } args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH); }
static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1) { Context *clCxt = Context::getContext(); String kernelName = "svm_linear"; int src_step = (int)src.step / src.elemSize(); int src2_step = (int)src2.step / src2.elemSize(); int dst_step = (int)dst.step / dst.elemSize(); int x = MIN(16, src_rows); int y = MIN(16, src2_cols); size_t localThreads[] = {x, y, 1}; size_t globalThreads[] = {src2_cols, src_rows, 1}; int width = var_count; std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&width)); float alpha = 0.0f, beta = 0.0f; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { alpha = (float)alpha1; beta = (float)beta1; args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha)); args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta)); } else { args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1)); args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1)); } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1); }
// FIXME: // This function cannot sort arrays with duplicated keys static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan) { CV_Error(-1, "This function is incorrect at the moment."); Context * cxt = Context::getContext(); size_t globalThreads[3] = {vecSize, 1, 1}; std::vector< std::pair<size_t, const void *> > args; char build_opt_buf [100]; genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf); //local String kernelname = "selectionSortLocal"; #ifdef ANDROID int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize(); #else int lds_size = GROUP_SIZE * keys.elemSize(); #endif args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize)); args.push_back(std::make_pair(lds_size, (void*)NULL)); #ifdef ANDROID openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf); #else size_t localThreads[3] = {GROUP_SIZE, 1, 1}; openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf); #endif //final kernelname = "selectionSortFinal"; args.pop_back(); #ifdef ANDROID openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf); #else openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf); #endif }
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) : surf_(surf), img_cols(img.cols), img_rows(img.rows), use_mask(!mask.empty()), counters(oclMat()), imgTex(NULL), sumTex(NULL), maskSumTex(NULL), _img(img) { CV_Assert(!img.empty() && img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); const int min_size = calcSize(surf_.nOctaves - 1, 0); CV_Assert(img_rows - min_size >= 0); CV_Assert(img_cols - min_size >= 0); const int layer_rows = img_rows >> (surf_.nOctaves - 1); const int layer_cols = img_cols >> (surf_.nOctaves - 1); const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; CV_Assert(layer_rows - 2 * min_margin > 0); CV_Assert(layer_cols - 2 * min_margin > 0); maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535); maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535); CV_Assert(maxFeatures > 0); counters.create(1, surf_.nOctaves + 1, CV_32SC1); counters.setTo(Scalar::all(0)); integral(img, surf_.sum); if(support_image2d()) { bindImgTex(img, imgTex); bindImgTex(surf_.sum, sumTex); } maskSumTex = 0; if (use_mask) { CV_Error(CV_StsBadFunc, "Masked SURF detector is not implemented yet"); //!FIXME // temp fix for missing min overload //oclMat temp(mask.size(), mask.type()); //temp.setTo(Scalar::all(1.0)); ////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this //integral(surf_.mask1, surf_.maskSum); //bindImgTex(surf_.maskSum, maskSumTex); } }
static void convert_C4C3(const oclMat &src, cl_mem &dst) { int srcStep_in_pixel = src.step1() / src.oclchannels(); int pixel_end = src.wholecols * src.wholerows - 1; Context *clCxt = src.clCxt; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]); std::vector< std::pair<size_t, const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end)); size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1}; size_t localThreads[3] = { 256, 1, 1 }; openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); }
void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {(size_t)dst.cols, (size_t)dst.rows, 1}; int weight_step = (int)(weight.step/weight.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int dst_step = (int)(dst.step/dst.elemSize()); char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } String kernel_name = "getBackgroundImage_kernel"; vector< pair<size_t, const void*> > args; args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols)); args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) : surf_(surf), img_cols(img.cols), img_rows(img.rows), use_mask(!mask.empty()), imgTex(NULL), sumTex(NULL), maskSumTex(NULL) { CV_Assert(!img.empty() && img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); const int min_size = calcSize(surf_.nOctaves - 1, 0); CV_Assert(img_rows - min_size >= 0); CV_Assert(img_cols - min_size >= 0); const int layer_rows = img_rows >> (surf_.nOctaves - 1); const int layer_cols = img_cols >> (surf_.nOctaves - 1); const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; CV_Assert(layer_rows - 2 * min_margin > 0); CV_Assert(layer_cols - 2 * min_margin > 0); maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535); maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535); CV_Assert(maxFeatures > 0); counters.create(1, surf_.nOctaves + 1, CV_32SC1); counters.setTo(Scalar::all(0)); //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold)); bindImgTex(img, imgTex); integral(img, surf_.sum); // the two argumented integral version is incorrect bindImgTex(surf_.sum, sumTex); maskSumTex = 0; if (use_mask) { throw std::exception(); //!FIXME // temp fix for missing min overload //oclMat temp(mask.size(), mask.type()); //temp.setTo(Scalar::all(1.0)); ////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this //integral(surf_.mask1, surf_.maskSum); //bindImgTex(surf_.maskSum, maskSumTex); } }