void update_cur_bbox_vec(std::vector<bbox_t> _cur_bbox_vec) { cur_bbox_vec = _cur_bbox_vec; good_bbox_vec_flags = std::vector<bool>(cur_bbox_vec.size(), true); cv::Mat prev_pts, cur_pts_flow_cpu; for (auto &i : cur_bbox_vec) { float x_center = (i.x + i.w / 2.0F); float y_center = (i.y + i.h / 2.0F); prev_pts.push_back(cv::Point2f(x_center, y_center)); } if (prev_pts.rows == 0) prev_pts_flow_cpu = cv::Mat(); else cv::transpose(prev_pts, prev_pts_flow_cpu); if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) { prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1); err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1); } prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream); }
void CGLUtil::gpuMapRGBResources(const cv::cuda::GpuMat& cvgmRGBs_){ int nPyrLevel_ = getLevel( cvgmRGBs_.cols ); // map OpenGL buffer object for writing from CUDA void *pDev; cudaGraphicsMapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0); size_t nSize; cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceRGBVBO[nPyrLevel_] ); cv::cuda::GpuMat cvgmRGBs(cvgmRGBs_.size(),CV_8UC3,pDev); cvgmRGBs_.copyTo(cvgmRGBs); cudaGraphicsUnmapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0); // render from the vbo glBindBuffer(GL_ARRAY_BUFFER, _auRGBVBO[nPyrLevel_]); glColorPointer(3, GL_UNSIGNED_BYTE, 0, 0); glEnableClientState(GL_COLOR_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem //glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] ); //glDisableClientState(GL_COLOR_ARRAY); //glBindBuffer( GL_ARRAY_BUFFER, 0 ); }
void CGLUtil::gpuMapNlResources(const cv::cuda::GpuMat& cvgmNls_){ int nPyrLevel_ = getLevel( cvgmNls_.cols ); // map OpenGL buffer object for writing from CUDA void *pDev; cudaGraphicsMapResources(1, &_apResourceNlVBO[nPyrLevel_], 0); size_t nSize; cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceNlVBO[nPyrLevel_] ); cv::cuda::GpuMat cvgmNls(cvgmNls_.size(),CV_32FC3,pDev); cvgmNls_.copyTo(cvgmNls); cudaGraphicsUnmapResources(1, &_apResourceNlVBO[nPyrLevel_], 0); // render from the vbo glBindBuffer(GL_ARRAY_BUFFER, _auNlVBO[nPyrLevel_]); glNormalPointer(GL_FLOAT, 12, 0); //12 is the stride = the number of bytes occupied by each normal glEnableClientState(GL_NORMAL_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem //glColor3f(1.0, 0.0, 0.0); //glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] ); //glDisableClientState(GL_NORMAL_ARRAY); //glBindBuffer( GL_ARRAY_BUFFER, 0 ); }
void CGLUtil::gpuMapPtResources(const cv::cuda::GpuMat& cvgmPts_){ int nPyrLevel_ = getLevel( cvgmPts_.cols ); // map OpenGL buffer object for writing from CUDA void *pDev; cudaGraphicsMapResources(1, &_apResourcePtVBO[nPyrLevel_], 0); size_t nSize; cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourcePtVBO[nPyrLevel_] ); cv::cuda::GpuMat cvgmPts( cvgmPts_.size(),CV_32FC3,pDev ); cvgmPts_.copyTo(cvgmPts); // the operation of the Buffer must be done before cudaGraphicsUnmapResources(), otherwise, the buffer will affects each other cudaGraphicsUnmapResources(1, &_apResourcePtVBO[nPyrLevel_], 0); // render from the vbo glBindBuffer(GL_ARRAY_BUFFER, _auPtVBO[nPyrLevel_]); glVertexPointer(3, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY);//you cant move glEnableClientState in front of cuda GraphicsMapResources, otherwise, you will have weird problem //glColor3f(1.0, 0.0, 0.0); //glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] ); //glDisableClientState(GL_VERTEX_ARRAY); //glBindBuffer( GL_ARRAY_BUFFER, 0 ); }
void enqueueSend(cv::cuda::GpuMat& m, cv::cuda::Stream& strm) { m.download(image_data_, strm); strm.enqueueHostCallback( [](int status, void *userData) { (void)status; static_cast<GPUSender *>(userData)->send(); }, (void *)this); }
void _SSD::Preprocess(const cv::cuda::GpuMat& img, std::vector<cv::cuda::GpuMat>* input_channels) { /* Convert the input image to the input image format of the network. */ cv::cuda::GpuMat sample; if (img.channels() == 3 && num_channels_ == 1) cv::cuda::cvtColor(img, sample, cv::COLOR_BGR2GRAY); else if (img.channels() == 4 && num_channels_ == 1) cv::cuda::cvtColor(img, sample, cv::COLOR_BGRA2GRAY); else if (img.channels() == 4 && num_channels_ == 3) cv::cuda::cvtColor(img, sample, cv::COLOR_BGRA2BGR); else if (img.channels() == 1 && num_channels_ == 3) cv::cuda::cvtColor(img, sample, cv::COLOR_GRAY2BGR); else sample = img; cv::cuda::GpuMat sample_resized; if (sample.size() != input_geometry_) cv::cuda::resize(sample, sample_resized, input_geometry_); else sample_resized = sample; cv::cuda::GpuMat sample_float; if (num_channels_ == 3) sample_resized.convertTo(sample_float, CV_32FC3); else sample_resized.convertTo(sample_float, CV_32FC1); cv::cuda::GpuMat sample_normalized; // cv::subtract(sample_float, mean_, sample_normalized); sample_normalized = sample_float; /* This operation will write the separate BGR planes directly to the * input layer of the network because it is wrapped by the cv::Mat * objects in input_channels. */ cv::cuda::split(sample_normalized, *input_channels); // CHECK(reinterpret_cast<float*>(input_channels->at(0).data) // == net_->input_blobs()[0]->cpu_data()) // << "Input channels are not wrapping the input layer of the network."; }
void update_tracking_flow(cv::Mat src_mat, std::vector<bbox_t> _cur_bbox_vec) { int const old_gpu_id = cv::cuda::getDevice(); if (old_gpu_id != gpu_id) cv::cuda::setDevice(gpu_id); if (src_mat.channels() == 3) { if (src_mat_gpu.cols == 0) { src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); } update_cur_bbox_vec(_cur_bbox_vec); //src_grey_gpu.upload(src_mat, stream); // use BGR src_mat_gpu.upload(src_mat, stream); cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); } if (old_gpu_id != gpu_id) cv::cuda::setDevice(old_gpu_id); }
bool Optimizer::optimizeA(const cv::cuda::GpuMat _d,cv::cuda::GpuMat _a){ using namespace cv::cuda::device::dtam_optimizer; localStream = cv::cuda::StreamAccessor::getStream(cvStream); this->_a=_a; Mat tmp(cv.rows,cv.cols,CV_32FC1); bool doneOptimizing = theta <= thetaMin; int layerStep = cv.rows * cv.cols; float* d = (float*) _d.data; float* a = (float*) _a.data; loadConstants(cv.rows, cv.cols, cv.layers, layerStep, a, d, cv.data, (float*)cv.lo.data, (float*)cv.hi.data, (float*)cv.loInd.data); minimizeACaller ( cv.data, a, d, cv.layers, theta,lambda); theta*=thetaStep; if (doneOptimizing){ stableDepthReady=Ptr<char>((char*)(new cudaEvent_t)); cudaEventCreate((cudaEvent_t*)(char*)stableDepthReady,cudaEventBlockingSync); _a.convertTo(stableDepth,CV_32FC1,cv.depthStep,cv.far,cvStream); cudaEventRecord(*(cudaEvent_t*)(char*)stableDepthReady,localStream); stableDepthEnqueued = 1; } return doneOptimizing; }
GLuint CGLUtil::gpuMapRgb2PixelBufferObj(const cv::cuda::GpuMat& cvgmRGB_ ){ //http://rickarkin.blogspot.co.uk/2012/03/use-pbo-to-share-buffer-between-cuda.html int nPyrLevel_ = getLevel( cvgmRGB_.cols ); GLuint uTexture; // map OpenGL buffer object for writing from CUDA if (cvgmRGB_.channels() == 3) { uTexture = _auTexture[nPyrLevel_]; void *pDev; cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0)); size_t nSize; cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceRGBPxielBO[nPyrLevel_])); cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC3, pDev); cvgmRGB_.copyTo(cvgmRGBA); cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0) ); //texture mapping glBindTexture( GL_TEXTURE_2D, uTexture); glBindBuffer ( GL_PIXEL_UNPACK_BUFFER_ARB, _auRGBPixelBO[nPyrLevel_]); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); errorDetectorGL(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glBindTexture(GL_TEXTURE_2D, 0); } else if (cvgmRGB_.channels()==1) { uTexture = _auGrayTexture[nPyrLevel_]; void *pDev; cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0)); size_t nSize; cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceGrayPxielBO[nPyrLevel_])); cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC1, pDev); cvgmRGB_.copyTo(cvgmRGBA); cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0) ); //texture mapping glBindTexture(GL_TEXTURE_2D, uTexture); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, _auGrayPixelBO[nPyrLevel_]); glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); errorDetectorGL(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glBindTexture(GL_TEXTURE_2D, 0); } return uTexture; }//gpuMapRgb2PixelBufferObj
std::vector<bbox_t> tracking_flow(cv::Mat dst_mat, bool check_error = true) { if (sync_PyrLKOpticalFlow_gpu.empty()) { std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n"; return cur_bbox_vec; } int const old_gpu_id = cv::cuda::getDevice(); if(old_gpu_id != gpu_id) cv::cuda::setDevice(gpu_id); if (dst_mat_gpu.cols == 0) { dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type()); dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1); } //dst_grey_gpu.upload(dst_mat, stream); // use BGR dst_mat_gpu.upload(dst_mat, stream); cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 1, stream); if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) { stream.waitForCompletion(); src_grey_gpu = dst_grey_gpu.clone(); cv::cuda::setDevice(old_gpu_id); return cur_bbox_vec; } ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream); // OpenCV 3.x cv::Mat cur_pts_flow_cpu; cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream); dst_grey_gpu.copyTo(src_grey_gpu, stream); cv::Mat err_cpu, status_cpu; err_gpu.download(err_cpu, stream); status_gpu.download(status_cpu, stream); stream.waitForCompletion(); std::vector<bbox_t> result_bbox_vec; if (err_cpu.cols == cur_bbox_vec.size() && status_cpu.cols == cur_bbox_vec.size()) { for (size_t i = 0; i < cur_bbox_vec.size(); ++i) { cv::Point2f cur_key_pt = cur_pts_flow_cpu.at<cv::Point2f>(0, i); cv::Point2f prev_key_pt = prev_pts_flow_cpu.at<cv::Point2f>(0, i); float moved_x = cur_key_pt.x - prev_key_pt.x; float moved_y = cur_key_pt.y - prev_key_pt.y; if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) if (err_cpu.at<float>(0, i) < flow_error && status_cpu.at<unsigned char>(0, i) != 0 && ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) { cur_bbox_vec[i].x += moved_x + 0.5; cur_bbox_vec[i].y += moved_y + 0.5; result_bbox_vec.push_back(cur_bbox_vec[i]); } else good_bbox_vec_flags[i] = false; else good_bbox_vec_flags[i] = false; //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); } } cur_pts_flow_gpu.swap(prev_pts_flow_gpu); cur_pts_flow_cpu.copyTo(prev_pts_flow_cpu); if (old_gpu_id != gpu_id) cv::cuda::setDevice(old_gpu_id); return result_bbox_vec; }