void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) { CV_Assert(prevImg.type() == CV_8UC1); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(maxLevel >= 0); CV_Assert(winSize.width > 2 && winSize.height > 2); if (err) err->create(prevImg.size(), CV_32FC1); prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); prevPyr_[0] = prevImg; nextImg.convertTo(nextPyr_[0], CV_32F); for (int level = 1; level <= maxLevel; ++level) { pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]); pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]); } ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]); ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]); uPyr_[1].setTo(Scalar::all(0)); vPyr_[1].setTo(Scalar::all(0)); Size winSize2i(winSize.width, winSize.height); int idx = 0; for (int level = maxLevel; level >= 0; level--) { int idx2 = (idx + 1) & 1; lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2], level == 0 ? err : 0, winSize2i, iters); if (level > 0) idx = idx2; } uPyr_[idx].copyTo(u); vPyr_[idx].copyTo(v); clFinish(*(cl_command_queue*)prevImg.clCxt->getOpenCLCommandQueuePtr()); }
Moments ocl_moments(oclMat& src, bool binary) //for image { CV_Assert(src.oclchannels() == 1); if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); } if(binary) { oclMat mask; if(src.type() != CV_8UC1) { src.convertTo(mask, CV_8UC1); } oclMat src8u(src.size(), CV_8UC1); src8u.setTo(Scalar(255), mask); src = src8u; } const int TILE_SIZE = 256; CvMoments mom; memset(&mom, 0, sizeof(mom)); cv::Size size = src.size(); int blockx, blocky; blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE; blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE; oclMat dst_m; int tile_height = TILE_SIZE; size_t localThreads[3] = {1, tile_height, 1}; size_t globalThreads[3] = {blockx, size.height, 1}; if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst_m.create(blocky * 10, blockx, CV_64FC1); }else { dst_m.create(blocky * 10, blockx, CV_32FC1); } int src_step = (int)(src.step/src.elemSize()); int dstm_step = (int)(dst_m.step/dst_m.elemSize()); std::vector<std::pair<size_t , const void *> > args,args_sum; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step )); int binary_; if(binary) binary_ = 1; else binary_ = 0; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_)); char builOption[128]; if(binary || src.type() == CV_8UC1) { snprintf(builOption, 128, "-D CV_8UC1"); }else if(src.type() == CV_16UC1) { snprintf(builOption, 128, "-D CV_16UC1"); }else if(src.type() == CV_16SC1) { snprintf(builOption, 128, "-D CV_16SC1"); }else if(src.type() == CV_32FC1) { snprintf(builOption, 128, "-D CV_32FC1"); }else if(src.type() == CV_64FC1) { snprintf(builOption, 128, "-D CV_64FC1"); }else { CV_Error( CV_StsUnsupportedFormat, "" ); } openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption); Mat tmp(dst_m); tmp.convertTo(tmp, CV_64FC1); double tmp_m[10] = {0}; for(int j = 0; j < tmp.rows; j += 10) { for(int i = 0; i < tmp.cols; i++) { tmp_m[0] += tmp.at<double>(j, i); tmp_m[1] += tmp.at<double>(j + 1, i); tmp_m[2] += tmp.at<double>(j + 2, i); tmp_m[3] += tmp.at<double>(j + 3, i); tmp_m[4] += tmp.at<double>(j + 4, i); tmp_m[5] += tmp.at<double>(j + 5, i); tmp_m[6] += tmp.at<double>(j + 6, i); tmp_m[7] += tmp.at<double>(j + 7, i); tmp_m[8] += tmp.at<double>(j + 8, i); tmp_m[9] += tmp.at<double>(j + 9, i); } } mom.m00 = tmp_m[0]; mom.m10 = tmp_m[1]; mom.m01 = tmp_m[2]; mom.m20 = tmp_m[3]; mom.m11 = tmp_m[4]; mom.m02 = tmp_m[5]; mom.m30 = tmp_m[6]; mom.m21 = tmp_m[7]; mom.m12 = tmp_m[8]; mom.m03 = tmp_m[9]; icvCompleteMomentState( &mom ); return mom; }
void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy) { CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); CV_Assert( I0.size() == I1.size() ); CV_Assert( I0.type() == I1.type() ); CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); CV_Assert( nscales > 0 ); // allocate memory for the pyramid structure I0s.resize(nscales); I1s.resize(nscales); u1s.resize(nscales); u2s.resize(nscales); //I0s_step == I1s_step I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); if (!useInitialFlow) { flowx.create(I0.size(), CV_32FC1); flowy.create(I0.size(), CV_32FC1); } //u1s_step != u2s_step u1s[0] = flowx; u2s[0] = flowy; I1x_buf.create(I0.size(), CV_32FC1); I1y_buf.create(I0.size(), CV_32FC1); I1w_buf.create(I0.size(), CV_32FC1); I1wx_buf.create(I0.size(), CV_32FC1); I1wy_buf.create(I0.size(), CV_32FC1); grad_buf.create(I0.size(), CV_32FC1); rho_c_buf.create(I0.size(), CV_32FC1); p11_buf.create(I0.size(), CV_32FC1); p12_buf.create(I0.size(), CV_32FC1); p21_buf.create(I0.size(), CV_32FC1); p22_buf.create(I0.size(), CV_32FC1); diff_buf.create(I0.size(), CV_32FC1); // create the scales for (int s = 1; s < nscales; ++s) { ocl::pyrDown(I0s[s - 1], I0s[s]); ocl::pyrDown(I1s[s - 1], I1s[s]); if (I0s[s].cols < 16 || I0s[s].rows < 16) { nscales = s; break; } if (useInitialFlow) { ocl::pyrDown(u1s[s - 1], u1s[s]); ocl::pyrDown(u2s[s - 1], u2s[s]); //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]); multiply(0.5, u1s[s], u1s[s]); //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]); multiply(0.5, u1s[s], u2s[s]); } } // pyramidal structure for computing the optical flow for (int s = nscales - 1; s >= 0; --s) { // compute the optical flow at the current scale procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); // if this was the last scale, finish now if (s == 0) break; // otherwise, upsample the optical flow // zoom the optical flow for the next finer scale ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); // scale the optical flow with the appropriate zoom factor multiply(2, u1s[s - 1], u1s[s - 1]); multiply(2, u2s[s - 1], u2s[s - 1]); } }
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) { if (prevPts.empty()) { nextPts.release(); status.release(); return; } derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); iters = std::min(std::max(iters, 0), 100); const int cn = prevImg.oclchannels(); dim3 block, patch; calcPatchSize(winSize, cn, block, patch, isDeviceArch11_); CV_Assert(derivLambda >= 0); CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); if (useInitialFlow) CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); else ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2); ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); status.setTo(Scalar::all(1)); bool errMat = false; if (!err) { err = new oclMat(1, prevPts.cols, CV_32FC1); errMat = true; } else ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); // build the image pyramids. prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); if (cn == 1 || cn == 4) { prevImg.convertTo(prevPyr_[0], CV_32F); nextImg.convertTo(nextPyr_[0], CV_32F); } for (int level = 1; level <= maxLevel; ++level) { pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]); pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]); } // dI/dx ~ Ix, dI/dy ~ Iy for (int level = maxLevel; level >= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols, level, /*block, */patch, winSize, iters); } clFinish(*(cl_command_queue*)prevImg.clCxt->getOpenCLCommandQueuePtr()); if(errMat) delete err; }