void update_cur_bbox_vec(std::vector<bbox_t> _cur_bbox_vec)
    {
        cur_bbox_vec = _cur_bbox_vec;
        good_bbox_vec_flags = std::vector<bool>(cur_bbox_vec.size(), true);
        cv::Mat prev_pts, cur_pts_flow_cpu;

        for (auto &i : cur_bbox_vec) {
            float x_center = (i.x + i.w / 2.0F);
            float y_center = (i.y + i.h / 2.0F);
            prev_pts.push_back(cv::Point2f(x_center, y_center));
        }

        if (prev_pts.rows == 0)
            prev_pts_flow_cpu = cv::Mat();
        else
            cv::transpose(prev_pts, prev_pts_flow_cpu);

        if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) {
            prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type());
            cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type());

            status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1);
            err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1);
        }

        prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream);
    }
	void CGLUtil::gpuMapRGBResources(const cv::cuda::GpuMat& cvgmRGBs_){
		int nPyrLevel_ = getLevel( cvgmRGBs_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceRGBVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmRGBs(cvgmRGBs_.size(),CV_8UC3,pDev);
		cvgmRGBs_.copyTo(cvgmRGBs);
		cudaGraphicsUnmapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auRGBVBO[nPyrLevel_]);
		glColorPointer(3, GL_UNSIGNED_BYTE, 0, 0);
		glEnableClientState(GL_COLOR_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_COLOR_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}
	void CGLUtil::gpuMapNlResources(const cv::cuda::GpuMat& cvgmNls_){
		int nPyrLevel_ = getLevel( cvgmNls_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourceNlVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceNlVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmNls(cvgmNls_.size(),CV_32FC3,pDev);
		cvgmNls_.copyTo(cvgmNls);
		cudaGraphicsUnmapResources(1, &_apResourceNlVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auNlVBO[nPyrLevel_]);
		glNormalPointer(GL_FLOAT, 12, 0); //12 is the stride = the number of bytes occupied by each normal
		glEnableClientState(GL_NORMAL_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glColor3f(1.0, 0.0, 0.0);
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_NORMAL_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}
	void CGLUtil::gpuMapPtResources(const cv::cuda::GpuMat& cvgmPts_){
		int nPyrLevel_ = getLevel( cvgmPts_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourcePtVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourcePtVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmPts( cvgmPts_.size(),CV_32FC3,pDev );
		cvgmPts_.copyTo(cvgmPts); // the operation of the Buffer must be done before cudaGraphicsUnmapResources(), otherwise, the buffer will affects each other
		cudaGraphicsUnmapResources(1, &_apResourcePtVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auPtVBO[nPyrLevel_]);
		glVertexPointer(3, GL_FLOAT, 0, 0);
		glEnableClientState(GL_VERTEX_ARRAY);//you cant move glEnableClientState in front of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glColor3f(1.0, 0.0, 0.0);
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_VERTEX_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}
Beispiel #5
0
 void enqueueSend(cv::cuda::GpuMat& m, cv::cuda::Stream& strm)
 {
     m.download(image_data_, strm);
     strm.enqueueHostCallback(
         [](int status, void *userData)
         {
             (void)status;
            static_cast<GPUSender *>(userData)->send();
         },
         (void *)this);
 }
Beispiel #6
0
void _SSD::Preprocess(const cv::cuda::GpuMat& img, std::vector<cv::cuda::GpuMat>* input_channels)
{
    /* Convert the input image to the input image format of the network. */
    cv::cuda::GpuMat sample;
    if (img.channels() == 3 && num_channels_ == 1)
        cv::cuda::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
    else if (img.channels() == 4 && num_channels_ == 1)
        cv::cuda::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
    else if (img.channels() == 4 && num_channels_ == 3)
        cv::cuda::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
    else if (img.channels() == 1 && num_channels_ == 3)
        cv::cuda::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
    else
        sample = img;

    cv::cuda::GpuMat sample_resized;
    if (sample.size() != input_geometry_)
        cv::cuda::resize(sample, sample_resized, input_geometry_);
    else
        sample_resized = sample;

    cv::cuda::GpuMat sample_float;
    if (num_channels_ == 3)
        sample_resized.convertTo(sample_float, CV_32FC3);
    else
        sample_resized.convertTo(sample_float, CV_32FC1);

    cv::cuda::GpuMat sample_normalized;
//	cv::subtract(sample_float, mean_, sample_normalized);

    sample_normalized = sample_float;

    /* This operation will write the separate BGR planes directly to the
     * input layer of the network because it is wrapped by the cv::Mat
     * objects in input_channels. */
    cv::cuda::split(sample_normalized, *input_channels);

//	CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
//			== net_->input_blobs()[0]->cpu_data())
//		<< "Input channels are not wrapping the input layer of the network.";
}
    void update_tracking_flow(cv::Mat src_mat, std::vector<bbox_t> _cur_bbox_vec)
    {
        int const old_gpu_id = cv::cuda::getDevice();
        if (old_gpu_id != gpu_id)
            cv::cuda::setDevice(gpu_id);

        if (src_mat.channels() == 3) {
            if (src_mat_gpu.cols == 0) {
                src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
                src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1);
            }

            update_cur_bbox_vec(_cur_bbox_vec);

            //src_grey_gpu.upload(src_mat, stream);    // use BGR
            src_mat_gpu.upload(src_mat, stream);
            cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream);
        }
        if (old_gpu_id != gpu_id)
            cv::cuda::setDevice(old_gpu_id);
    }
Beispiel #8
0
bool Optimizer::optimizeA(const cv::cuda::GpuMat _d,cv::cuda::GpuMat _a){
    using namespace cv::cuda::device::dtam_optimizer;
    localStream = cv::cuda::StreamAccessor::getStream(cvStream);
    this->_a=_a;

    Mat tmp(cv.rows,cv.cols,CV_32FC1);
    bool doneOptimizing = theta <= thetaMin;
    int layerStep = cv.rows * cv.cols;
    float* d = (float*) _d.data;
    float* a = (float*) _a.data;

   loadConstants(cv.rows, cv.cols, cv.layers, layerStep, a, d, cv.data, (float*)cv.lo.data,
           (float*)cv.hi.data, (float*)cv.loInd.data);
    minimizeACaller  ( cv.data, a, d, cv.layers, theta,lambda);
    theta*=thetaStep;
    if (doneOptimizing){
        stableDepthReady=Ptr<char>((char*)(new cudaEvent_t));
        cudaEventCreate((cudaEvent_t*)(char*)stableDepthReady,cudaEventBlockingSync);
        _a.convertTo(stableDepth,CV_32FC1,cv.depthStep,cv.far,cvStream);
        cudaEventRecord(*(cudaEvent_t*)(char*)stableDepthReady,localStream);
        stableDepthEnqueued = 1;
    }
    return doneOptimizing;
}
	GLuint CGLUtil::gpuMapRgb2PixelBufferObj(const cv::cuda::GpuMat& cvgmRGB_ ){
		//http://rickarkin.blogspot.co.uk/2012/03/use-pbo-to-share-buffer-between-cuda.html
		int nPyrLevel_ = getLevel( cvgmRGB_.cols );
		GLuint uTexture;
		// map OpenGL buffer object for writing from CUDA
		if (cvgmRGB_.channels() == 3) {
			uTexture = _auTexture[nPyrLevel_];
			void *pDev;
			cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0)); 
			size_t nSize; 
			cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceRGBPxielBO[nPyrLevel_]));
			cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC3, pDev); 
			cvgmRGB_.copyTo(cvgmRGBA); 
			cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0) );
			//texture mapping
			glBindTexture( GL_TEXTURE_2D, uTexture);
			glBindBuffer ( GL_PIXEL_UNPACK_BUFFER_ARB, _auRGBPixelBO[nPyrLevel_]);
			glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
			errorDetectorGL();
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
			glBindTexture(GL_TEXTURE_2D, 0);
		}
		else if (cvgmRGB_.channels()==1) {
			uTexture = _auGrayTexture[nPyrLevel_];
			void *pDev;
			cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0)); 
			size_t nSize; 
			cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceGrayPxielBO[nPyrLevel_]));
			cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC1, pDev);
			cvgmRGB_.copyTo(cvgmRGBA); 
			cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0) );
			//texture mapping
			glBindTexture(GL_TEXTURE_2D, uTexture);
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, _auGrayPixelBO[nPyrLevel_]);
			glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
			errorDetectorGL();
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
			glBindTexture(GL_TEXTURE_2D, 0);
		}
		return uTexture;
	}//gpuMapRgb2PixelBufferObj
    std::vector<bbox_t> tracking_flow(cv::Mat dst_mat, bool check_error = true)
    {
        if (sync_PyrLKOpticalFlow_gpu.empty()) {
            std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n";
            return cur_bbox_vec;
        }

        int const old_gpu_id = cv::cuda::getDevice();
        if(old_gpu_id != gpu_id)
            cv::cuda::setDevice(gpu_id);

        if (dst_mat_gpu.cols == 0) {
            dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
            dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
        }

        //dst_grey_gpu.upload(dst_mat, stream);    // use BGR
        dst_mat_gpu.upload(dst_mat, stream);
        cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 1, stream);

        if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
            stream.waitForCompletion();
            src_grey_gpu = dst_grey_gpu.clone();
            cv::cuda::setDevice(old_gpu_id);
            return cur_bbox_vec;
        }

        ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu);    // OpenCV 2.4.x
        sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream);    // OpenCV 3.x

        cv::Mat cur_pts_flow_cpu;
        cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream);

        dst_grey_gpu.copyTo(src_grey_gpu, stream);

        cv::Mat err_cpu, status_cpu;
        err_gpu.download(err_cpu, stream);
        status_gpu.download(status_cpu, stream);

        stream.waitForCompletion();

        std::vector<bbox_t> result_bbox_vec;

        if (err_cpu.cols == cur_bbox_vec.size() && status_cpu.cols == cur_bbox_vec.size())
        {
            for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
            {
                cv::Point2f cur_key_pt = cur_pts_flow_cpu.at<cv::Point2f>(0, i);
                cv::Point2f prev_key_pt = prev_pts_flow_cpu.at<cv::Point2f>(0, i);

                float moved_x = cur_key_pt.x - prev_key_pt.x;
                float moved_y = cur_key_pt.y - prev_key_pt.y;

                if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i])
                    if (err_cpu.at<float>(0, i) < flow_error && status_cpu.at<unsigned char>(0, i) != 0 &&
                        ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0)
                    {
                        cur_bbox_vec[i].x += moved_x + 0.5;
                        cur_bbox_vec[i].y += moved_y + 0.5;
                        result_bbox_vec.push_back(cur_bbox_vec[i]);
                    }
                    else good_bbox_vec_flags[i] = false;
                else good_bbox_vec_flags[i] = false;

                //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]);
            }
        }

        cur_pts_flow_gpu.swap(prev_pts_flow_gpu);
        cur_pts_flow_cpu.copyTo(prev_pts_flow_cpu);

        if (old_gpu_id != gpu_id)
            cv::cuda::setDevice(old_gpu_id);

        return result_bbox_vec;
    }