void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks) { if (empty()) return; if (masks.empty()) { Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) *trainCollectionCPU_ptr = trainDescCollection[i]; trainCollection.upload(trainCollectionCPU); maskCollection.release(); } else { CV_Assert(masks.size() == trainDescCollection.size()); Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>(); oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) { const oclMat &train = trainDescCollection[i]; const oclMat &mask = masks[i]; CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows)); *trainCollectionCPU_ptr = train; *maskCollectionCPU_ptr = mask; } trainCollection.upload(trainCollectionCPU); maskCollection.upload(maskCollectionCPU); } }
void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) { CV_Assert(src.type() == CV_8UC1); CV_Assert(src.cols < std::numeric_limits<unsigned short>::max()); CV_Assert(src.rows < std::numeric_limits<unsigned short>::max()); CV_Assert(method == CV_HOUGH_GRADIENT); CV_Assert(dp > 0); CV_Assert(minRadius > 0 && maxRadius > minRadius); CV_Assert(cannyThreshold > 0); CV_Assert(votesThreshold > 0); CV_Assert(maxCircles > 0); const float idp = 1.0f / dp; cv::ocl::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold); ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.srcPoints); const int pointsCount = hough::buildPointList_gpu(buf.edges, buf.srcPoints); if (pointsCount == 0) { circles.release(); return; } ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum); buf.accum.setTo(Scalar::all(0)); hough::circlesAccumCenters_gpu(buf.srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp); ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.centers); int centersCount = hough::buildCentersList_gpu(buf.accum, buf.centers, votesThreshold); if (centersCount == 0) { circles.release(); return; } if (minDist > 1) { cv::AutoBuffer<unsigned int> oldBuf_(centersCount); cv::AutoBuffer<unsigned int> newBuf_(centersCount); int newCount = 0; unsigned int* oldBuf = oldBuf_; unsigned int* newBuf = newBuf_; openCLSafeCall(clEnqueueReadBuffer(buf.centers.clCxt->impl->clCmdQueue, (cl_mem)buf.centers.data, CL_TRUE, 0, centersCount * sizeof(unsigned int), oldBuf, 0, NULL, NULL)); const int cellSize = cvRound(minDist); const int gridWidth = (src.cols + cellSize - 1) / cellSize; const int gridHeight = (src.rows + cellSize - 1) / cellSize; std::vector< std::vector<unsigned int> > grid(gridWidth * gridHeight); const float minDist2 = minDist * minDist; for (int i = 0; i < centersCount; ++i) { unsigned int p = oldBuf[i]; const int px = p & 0xFFFF; const int py = (p >> 16) & 0xFFFF; bool good = true; int xCell = static_cast<int>(px / cellSize); int yCell = static_cast<int>(py / cellSize); int x1 = xCell - 1; int y1 = yCell - 1; int x2 = xCell + 1; int y2 = yCell + 1; // boundary check x1 = std::max(0, x1); y1 = std::max(0, y1); x2 = std::min(gridWidth - 1, x2); y2 = std::min(gridHeight - 1, y2); for (int yy = y1; yy <= y2; ++yy) { for (int xx = x1; xx <= x2; ++xx) { vector<unsigned int>& m = grid[yy * gridWidth + xx]; for(size_t j = 0; j < m.size(); ++j) { const int val = m[j]; const int jx = val & 0xFFFF; const int jy = (val >> 16) & 0xFFFF; float dx = (float)(px - jx); float dy = (float)(py - jy); if (dx * dx + dy * dy < minDist2) { good = false; goto break_out; } } } } break_out: if(good) { grid[yCell * gridWidth + xCell].push_back(p); newBuf[newCount++] = p; } } openCLSafeCall(clEnqueueWriteBuffer(buf.centers.clCxt->impl->clCmdQueue, (cl_mem)buf.centers.data, CL_TRUE, 0, newCount * sizeof(unsigned int), newBuf, 0, 0, 0)); centersCount = newCount; }
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) { if (prevPts.empty()) { nextPts.release(); status.release(); //if (err) err->release(); return; } derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); iters = std::min(std::max(iters, 0), 100); const int cn = prevImg.oclchannels(); dim3 block, patch; calcPatchSize(winSize, cn, block, patch, isDeviceArch11_); CV_Assert(derivLambda >= 0); CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); if (useInitialFlow) CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); else ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f)); multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); //status.setTo(Scalar::all(1)); setTo(status, Scalar::all(1)); bool errMat = false; if (!err) { err = new oclMat(1, prevPts.cols, CV_32FC1); errMat = true; } else ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); //ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err); // build the image pyramids. prevPyr_.resize(maxLevel + 1); nextPyr_.resize(maxLevel + 1); if (cn == 1 || cn == 4) { //prevImg.convertTo(prevPyr_[0], CV_32F); //nextImg.convertTo(nextPyr_[0], CV_32F); convertTo(prevImg, prevPyr_[0], CV_32F); convertTo(nextImg, nextPyr_[0], CV_32F); } else { //oclMat buf_; // cvtColor(prevImg, buf_, COLOR_BGR2BGRA); // buf_.convertTo(prevPyr_[0], CV_32F); // cvtColor(nextImg, buf_, COLOR_BGR2BGRA); // buf_.convertTo(nextPyr_[0], CV_32F); } for (int level = 1; level <= maxLevel; ++level) { pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]); pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]); } // dI/dx ~ Ix, dI/dy ~ Iy for (int level = maxLevel; level >= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols, level, /*block, */patch, winSize, iters); } clFinish(prevImg.clCxt->impl->clCmdQueue); if(errMat) delete err; }