GLuint CGLUtil::gpuMapRgb2PixelBufferObj(const cv::cuda::GpuMat& cvgmRGB_ ){
		//http://rickarkin.blogspot.co.uk/2012/03/use-pbo-to-share-buffer-between-cuda.html
		int nPyrLevel_ = getLevel( cvgmRGB_.cols );
		GLuint uTexture;
		// map OpenGL buffer object for writing from CUDA
		if (cvgmRGB_.channels() == 3) {
			uTexture = _auTexture[nPyrLevel_];
			void *pDev;
			cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0)); 
			size_t nSize; 
			cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceRGBPxielBO[nPyrLevel_]));
			cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC3, pDev); 
			cvgmRGB_.copyTo(cvgmRGBA); 
			cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0) );
			//texture mapping
			glBindTexture( GL_TEXTURE_2D, uTexture);
			glBindBuffer ( GL_PIXEL_UNPACK_BUFFER_ARB, _auRGBPixelBO[nPyrLevel_]);
			glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
			errorDetectorGL();
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
			glBindTexture(GL_TEXTURE_2D, 0);
		}
		else if (cvgmRGB_.channels()==1) {
			uTexture = _auGrayTexture[nPyrLevel_];
			void *pDev;
			cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0)); 
			size_t nSize; 
			cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceGrayPxielBO[nPyrLevel_]));
			cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC1, pDev);
			cvgmRGB_.copyTo(cvgmRGBA); 
			cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0) );
			//texture mapping
			glBindTexture(GL_TEXTURE_2D, uTexture);
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, _auGrayPixelBO[nPyrLevel_]);
			glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
			errorDetectorGL();
			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
			glBindTexture(GL_TEXTURE_2D, 0);
		}
		return uTexture;
	}//gpuMapRgb2PixelBufferObj
	void CGLUtil::gpuMapRGBResources(const cv::cuda::GpuMat& cvgmRGBs_){
		int nPyrLevel_ = getLevel( cvgmRGBs_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceRGBVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmRGBs(cvgmRGBs_.size(),CV_8UC3,pDev);
		cvgmRGBs_.copyTo(cvgmRGBs);
		cudaGraphicsUnmapResources(1, &_apResourceRGBVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auRGBVBO[nPyrLevel_]);
		glColorPointer(3, GL_UNSIGNED_BYTE, 0, 0);
		glEnableClientState(GL_COLOR_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_COLOR_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}
	void CGLUtil::gpuMapNlResources(const cv::cuda::GpuMat& cvgmNls_){
		int nPyrLevel_ = getLevel( cvgmNls_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourceNlVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourceNlVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmNls(cvgmNls_.size(),CV_32FC3,pDev);
		cvgmNls_.copyTo(cvgmNls);
		cudaGraphicsUnmapResources(1, &_apResourceNlVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auNlVBO[nPyrLevel_]);
		glNormalPointer(GL_FLOAT, 12, 0); //12 is the stride = the number of bytes occupied by each normal
		glEnableClientState(GL_NORMAL_ARRAY);//you cant move glEnableClientState infront of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glColor3f(1.0, 0.0, 0.0);
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_NORMAL_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}
	void CGLUtil::gpuMapPtResources(const cv::cuda::GpuMat& cvgmPts_){
		int nPyrLevel_ = getLevel( cvgmPts_.cols );
		// map OpenGL buffer object for writing from CUDA
		void *pDev;
		cudaGraphicsMapResources(1, &_apResourcePtVBO[nPyrLevel_], 0);
		size_t nSize; 
		cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize, _apResourcePtVBO[nPyrLevel_] );
		cv::cuda::GpuMat cvgmPts( cvgmPts_.size(),CV_32FC3,pDev );
		cvgmPts_.copyTo(cvgmPts); // the operation of the Buffer must be done before cudaGraphicsUnmapResources(), otherwise, the buffer will affects each other
		cudaGraphicsUnmapResources(1, &_apResourcePtVBO[nPyrLevel_], 0);
		// render from the vbo
		glBindBuffer(GL_ARRAY_BUFFER, _auPtVBO[nPyrLevel_]);
		glVertexPointer(3, GL_FLOAT, 0, 0);
		glEnableClientState(GL_VERTEX_ARRAY);//you cant move glEnableClientState in front of cuda GraphicsMapResources, otherwise, you will have weird problem
		//glColor3f(1.0, 0.0, 0.0);
		//glDrawArrays(GL_POINTS, 0, btl::kinect::__aKinectWxH[usPyrLevel_] );
		//glDisableClientState(GL_VERTEX_ARRAY);
		//glBindBuffer( GL_ARRAY_BUFFER, 0 );
	}