C++ (Cpp) cudaAllocMapped 예제들

예제 #1

0

파일 보기

파일: gstCamera.cpp 프로젝트: XJTUeducation/jetson-inference

// ConvertRGBA
bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy )
{
	if( !input || !output )
		return false;
	
	if( !mRGBA[0] )
	{
		const size_t size = mWidth * mHeight * sizeof(float4);

		for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
		{
			if( zeroCopy )
			{
				void* cpuPtr = NULL;
				void* gpuPtr = NULL;

				if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) )
				{
					printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight);
					return false;
				}

				if( cpuPtr != gpuPtr )
				{
					printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n");
					return false;
				}

				mRGBA[n] = gpuPtr;
			}
			else
			{
				if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) )
				{
					printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight);
					return false;
				}
			}
		}
		
		printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS);
	}
	
	if( onboardCamera() )
	{
		// onboard camera is NV12
		if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
			return false;
	}
	else
	{
		// USB webcam is RGB
		if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
			return false;
	}
	
	*output     = mRGBA[mLatestRGBA];
	mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS;
	return true;
}

예제 #2

0

파일 보기

파일: gstCamera.cpp 프로젝트: XJTUeducation/jetson-inference

// checkBuffer
void gstCamera::checkBuffer()
{
	if( !mAppSink )
		return;

	// block waiting for the buffer
	GstSample* gstSample = gst_app_sink_pull_sample(mAppSink);
	
	if( !gstSample )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_app_sink_pull_sample() returned NULL...\n");
		return;
	}
	
	GstBuffer* gstBuffer = gst_sample_get_buffer(gstSample);
	
	if( !gstBuffer )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_sample_get_buffer() returned NULL...\n");
		return;
	}
	
	// retrieve
	GstMapInfo map; 

	if(	!gst_buffer_map(gstBuffer, &map, GST_MAP_READ) ) 
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer_map() failed...\n");
		return;
	}
	
	//gst_util_dump_mem(map.data, map.size); 

	void* gstData = map.data; //GST_BUFFER_DATA(gstBuffer);
	const uint32_t gstSize = map.size; //GST_BUFFER_SIZE(gstBuffer);
	
	if( !gstData )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL data pointer...\n");
		release_return;
	}
	
	// retrieve caps
	GstCaps* gstCaps = gst_sample_get_caps(gstSample);
	
	if( !gstCaps )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL caps...\n");
		release_return;
	}
	
	GstStructure* gstCapsStruct = gst_caps_get_structure(gstCaps, 0);
	
	if( !gstCapsStruct )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_caps had NULL structure...\n");
		release_return;
	}
	
	// get width & height of the buffer
	int width  = 0;
	int height = 0;
	
	if( !gst_structure_get_int(gstCapsStruct, "width", &width) ||
		!gst_structure_get_int(gstCapsStruct, "height", &height) )
	{
		printf(LOG_GSTREAMER "gstreamer camera -- gst_caps missing width/height...\n");
		release_return;
	}
	
	if( width < 1 || height < 1 )
		release_return;
	
	mWidth  = width;
	mHeight = height;
	mDepth  = (gstSize * 8) / (width * height);
	mSize   = gstSize;
	
	//printf(LOG_GSTREAMER "gstreamer camera recieved %ix%i frame (%u bytes, %u bpp)\n", width, height, gstSize, mDepth);
	
	// make sure ringbuffer is allocated
	if( !mRingbufferCPU[0] )
	{
		for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
		{
			if( !cudaAllocMapped(&mRingbufferCPU[n], &mRingbufferGPU[n], gstSize) )
				printf(LOG_CUDA "gstreamer camera -- failed to allocate ringbuffer %u  (size=%u)\n", n, gstSize);
		}
		
		printf(LOG_CUDA "gstreamer camera -- allocated %u ringbuffers, %u bytes each\n", NUM_RINGBUFFERS, gstSize);
	}
	
	// copy to next ringbuffer
	const uint32_t nextRingbuffer = (mLatestRingbuffer + 1) % NUM_RINGBUFFERS;		
	
	//printf(LOG_GSTREAMER "gstreamer camera -- using ringbuffer #%u for next frame\n", nextRingbuffer);
	memcpy(mRingbufferCPU[nextRingbuffer], gstData, gstSize);
	gst_buffer_unmap(gstBuffer, &map); 
	//gst_buffer_unref(gstBuffer);
	gst_sample_unref(gstSample);
	
	
	// update and signal sleeping threads
	mRingMutex->lock();
	mLatestRingbuffer = nextRingbuffer;
	mLatestRetrieved  = false;
	mRingMutex->unlock();
	mWaitEvent->wakeAll();
}

예제 #3

0

파일 보기

파일: GIEFeatExtractor.cpp 프로젝트: GiuliaP/caffe_feat_extraction

bool GIEFeatExtractor::init(string _caffemodel_file, string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB, string _prototxt_file, int _resizeWidth, int _resizeHeight, string _blob_name)
{

    cudaDeviceProp prop;
    int whichDevice;

    if ( CUDA_FAILED( cudaGetDevice(&whichDevice)) )
       return false;
 
    if ( CUDA_FAILED( cudaGetDeviceProperties(&prop, whichDevice)) )
       return false;

    if (prop.canMapHostMemory != 1)
    {
       std::cout << "Device cannot map memory!" << std::endl;
       return false;
    }

    //if ( CUDA_FAILED( cudaSetDeviceFlags(cudaDeviceMapHost)) )
    //    return false;

    // Assign specified .caffemodel, .binaryproto, .prototxt files     
    caffemodel_file  = _caffemodel_file;
    binaryproto_meanfile = _binaryproto_meanfile;
    
    mean_values.push_back(_meanB);
    mean_values.push_back(_meanG);
    mean_values.push_back(_meanR);

    prototxt_file = _prototxt_file;
    
    //Assign blob to be extracted
    blob_name = _blob_name;
    
    // Load and convert model
    std::stringstream gieModelStream;
    gieModelStream.seekg(0, gieModelStream.beg);

    if( !caffeToGIEModel( prototxt_file, caffemodel_file, binaryproto_meanfile, std::vector< std::string > {  blob_name }, 1, gieModelStream) )
    {
	std::cout << "Failed to load: " << caffemodel_file << std::endl;
    }

    std::cout << caffemodel_file << ": loaded." << std::endl;

    // Create runtime inference engine execution context
    nvinfer1::IRuntime* infer = createInferRuntime(gLogger);
    if( !infer )
    {
        std::cout << "Failed to create InferRuntime." << std::endl;
    }
	
    nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream);
    if( !engine )
    {
	std::cout << "Failed to create CUDA engine." << std::endl;
    }
	
    nvinfer1::IExecutionContext* context = engine->createExecutionContext();
    if( !context )
    {
	std::cout << "failed to create execution context." << std::endl;
    }

    std::cout << "CUDA engine context initialized with " << engine->getNbBindings() << " bindings." << std::endl;
	
    mInfer   = infer;
    mEngine  = engine;
    mContext = context;

    // Determine dimensions of network bindings
    const int inputIndex  = engine->getBindingIndex("data");
    const int outputIndex = engine->getBindingIndex( blob_name.c_str() );

    std::cout << caffemodel_file << " input  binding index: " << inputIndex << std::endl;
    std::cout << caffemodel_file << " output binding index: " << outputIndex << std::endl;
	
    nvinfer1::Dims3 inputDims  = engine->getBindingDimensions(inputIndex);
    nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex);
	
    size_t inputSize  = inputDims.c * inputDims.h * inputDims.w * sizeof(float);
    size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float);
	
    std::cout << caffemodel_file << "input  dims (c=" << inputDims.c << " h=" << inputDims.h << " w=" << inputDims.w << ") size=" << inputSize << std::endl;
    std::cout << caffemodel_file << "output dims (c=" << outputDims.c << " h=" << outputDims.h << " w=" << outputDims.w << ") size=" << outputSize << std::endl;
	
    // Allocate memory to hold the input image
    if ( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) )
    {
	std::cout << "Failed to alloc CUDA mapped memory for input, " << inputSize << " bytes" << std::endl;
    }

    mInputSize   = inputSize;
    mWidth       = inputDims.w;
    mHeight      = inputDims.h;
	
    // Allocate output memory to hold the result
    if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) )
    {
        std::cout << "Failed to alloc CUDA mapped memory for output, " << outputSize << " bytes" << std::endl;

    }

    mOutputSize    = outputSize;
    mOutputDims    = outputDims.c;
	
    std::cout << caffemodel_file << ": initialized." << std::endl;

    if (binaryproto_meanfile=="")
    {
        // Set input size if the mean pixel is used
        resizeDims.h = _resizeHeight;
        resizeDims.w = _resizeWidth;
        resizeDims.c = 3;
        resizeDims.n = 1;
    }

    return true;
}