// ConvertRGBA
bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy )
{
	if( !input || !output )
		return false;
	
	if( !mRGBA[0] )
	{
		const size_t size = mWidth * mHeight * sizeof(float4);

		for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
		{
			if( zeroCopy )
			{
				void* cpuPtr = NULL;
				void* gpuPtr = NULL;

				if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) )
				{
					printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight);
					return false;
				}

				if( cpuPtr != gpuPtr )
				{
					printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n");
					return false;
				}

				mRGBA[n] = gpuPtr;
			}
			else
			{
				if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) )
				{
					printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight);
					return false;
				}
			}
		}
		
		printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS);
	}
	
	if( onboardCamera() )
	{
		// onboard camera is NV12
		if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
			return false;
	}
	else
	{
		// USB webcam is RGB
		if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
			return false;
	}
	
	*output     = mRGBA[mLatestRGBA];
	mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS;
	return true;
}
// Allocate ZeroCopy mapped memory, shared between CUDA and CPU.
bool GIEFeatExtractor::cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size )
{
	if( !cpuPtr || !gpuPtr || size == 0 )
		return false;

	//CUDA(cudaSetDeviceFlags(cudaDeviceMapHost));

	if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) )
		return false;

	if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) )
		return false;

	memset(*cpuPtr, 0, size);
	std::cout << "cudaAllocMapped : " << size << " bytes" << std::endl;
	return true;
}
Beispiel #3
0
// PreProcess
bool imageNet::PreProcess( float* rgba, uint32_t width, uint32_t height )
{
	// verify parameters
	if( !rgba || width == 0 || height == 0 )
	{
		printf(LOG_TRT "imageNet::PreProcess( 0x%p, %u, %u ) -> invalid parameters\n", rgba, width, height);
		return false;
	}

	// downsample and convert to band-sequential BGR
	if( CUDA_FAILED(cudaPreImageNetMean((float4*)rgba, width, height, mInputCUDA, mWidth, mHeight,
								 make_float3(104.0069879317889f, 116.66876761696767f, 122.6789143406786f),
								 GetStream())) )
	{
		printf(LOG_TRT "imageNet::PreProcess() -- cudaPreImageNetMean() failed\n");
		return false;
	}

	return true;
}
bool GIEFeatExtractor::cudaFreeMapped(void *cpuPtr)
{
    if ( CUDA_FAILED( cudaFreeHost(cpuPtr) ) )
        return false;
    std::cout << "cudaFreeMapped: OK" << std::endl;
}
bool GIEFeatExtractor::init(string _caffemodel_file, string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB, string _prototxt_file, int _resizeWidth, int _resizeHeight, string _blob_name)
{

    cudaDeviceProp prop;
    int whichDevice;

    if ( CUDA_FAILED( cudaGetDevice(&whichDevice)) )
       return false;
 
    if ( CUDA_FAILED( cudaGetDeviceProperties(&prop, whichDevice)) )
       return false;

    if (prop.canMapHostMemory != 1)
    {
       std::cout << "Device cannot map memory!" << std::endl;
       return false;
    }

    //if ( CUDA_FAILED( cudaSetDeviceFlags(cudaDeviceMapHost)) )
    //    return false;

    // Assign specified .caffemodel, .binaryproto, .prototxt files     
    caffemodel_file  = _caffemodel_file;
    binaryproto_meanfile = _binaryproto_meanfile;
    
    mean_values.push_back(_meanB);
    mean_values.push_back(_meanG);
    mean_values.push_back(_meanR);

    prototxt_file = _prototxt_file;
    
    //Assign blob to be extracted
    blob_name = _blob_name;
    
    // Load and convert model
    std::stringstream gieModelStream;
    gieModelStream.seekg(0, gieModelStream.beg);

    if( !caffeToGIEModel( prototxt_file, caffemodel_file, binaryproto_meanfile, std::vector< std::string > {  blob_name }, 1, gieModelStream) )
    {
	std::cout << "Failed to load: " << caffemodel_file << std::endl;
    }

    std::cout << caffemodel_file << ": loaded." << std::endl;

    // Create runtime inference engine execution context
    nvinfer1::IRuntime* infer = createInferRuntime(gLogger);
    if( !infer )
    {
        std::cout << "Failed to create InferRuntime." << std::endl;
    }
	
    nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream);
    if( !engine )
    {
	std::cout << "Failed to create CUDA engine." << std::endl;
    }
	
    nvinfer1::IExecutionContext* context = engine->createExecutionContext();
    if( !context )
    {
	std::cout << "failed to create execution context." << std::endl;
    }

    std::cout << "CUDA engine context initialized with " << engine->getNbBindings() << " bindings." << std::endl;
	
    mInfer   = infer;
    mEngine  = engine;
    mContext = context;

    // Determine dimensions of network bindings
    const int inputIndex  = engine->getBindingIndex("data");
    const int outputIndex = engine->getBindingIndex( blob_name.c_str() );

    std::cout << caffemodel_file << " input  binding index: " << inputIndex << std::endl;
    std::cout << caffemodel_file << " output binding index: " << outputIndex << std::endl;
	
    nvinfer1::Dims3 inputDims  = engine->getBindingDimensions(inputIndex);
    nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex);
	
    size_t inputSize  = inputDims.c * inputDims.h * inputDims.w * sizeof(float);
    size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float);
	
    std::cout << caffemodel_file << "input  dims (c=" << inputDims.c << " h=" << inputDims.h << " w=" << inputDims.w << ") size=" << inputSize << std::endl;
    std::cout << caffemodel_file << "output dims (c=" << outputDims.c << " h=" << outputDims.h << " w=" << outputDims.w << ") size=" << outputSize << std::endl;
	
    // Allocate memory to hold the input image
    if ( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) )
    {
	std::cout << "Failed to alloc CUDA mapped memory for input, " << inputSize << " bytes" << std::endl;
    }

    mInputSize   = inputSize;
    mWidth       = inputDims.w;
    mHeight      = inputDims.h;
	
    // Allocate output memory to hold the result
    if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) )
    {
        std::cout << "Failed to alloc CUDA mapped memory for output, " << outputSize << " bytes" << std::endl;

    }

    mOutputSize    = outputSize;
    mOutputDims    = outputDims.c;
	
    std::cout << caffemodel_file << ": initialized." << std::endl;

    if (binaryproto_meanfile=="")
    {
        // Set input size if the mean pixel is used
        resizeDims.h = _resizeHeight;
        resizeDims.w = _resizeWidth;
        resizeDims.c = 3;
        resizeDims.n = 1;
    }

    return true;
}