// ConvertRGBA bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy ) { if( !input || !output ) return false; if( !mRGBA[0] ) { const size_t size = mWidth * mHeight * sizeof(float4); for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) { if( zeroCopy ) { void* cpuPtr = NULL; void* gpuPtr = NULL; if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) ) { printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight); return false; } if( cpuPtr != gpuPtr ) { printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n"); return false; } mRGBA[n] = gpuPtr; } else { if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) ) { printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight); return false; } } } printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS); } if( onboardCamera() ) { // onboard camera is NV12 if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) return false; } else { // USB webcam is RGB if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) return false; } *output = mRGBA[mLatestRGBA]; mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS; return true; }
// Allocate ZeroCopy mapped memory, shared between CUDA and CPU. bool GIEFeatExtractor::cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size ) { if( !cpuPtr || !gpuPtr || size == 0 ) return false; //CUDA(cudaSetDeviceFlags(cudaDeviceMapHost)); if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) ) return false; if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) ) return false; memset(*cpuPtr, 0, size); std::cout << "cudaAllocMapped : " << size << " bytes" << std::endl; return true; }
// PreProcess bool imageNet::PreProcess( float* rgba, uint32_t width, uint32_t height ) { // verify parameters if( !rgba || width == 0 || height == 0 ) { printf(LOG_TRT "imageNet::PreProcess( 0x%p, %u, %u ) -> invalid parameters\n", rgba, width, height); return false; } // downsample and convert to band-sequential BGR if( CUDA_FAILED(cudaPreImageNetMean((float4*)rgba, width, height, mInputCUDA, mWidth, mHeight, make_float3(104.0069879317889f, 116.66876761696767f, 122.6789143406786f), GetStream())) ) { printf(LOG_TRT "imageNet::PreProcess() -- cudaPreImageNetMean() failed\n"); return false; } return true; }
bool GIEFeatExtractor::cudaFreeMapped(void *cpuPtr) { if ( CUDA_FAILED( cudaFreeHost(cpuPtr) ) ) return false; std::cout << "cudaFreeMapped: OK" << std::endl; }
bool GIEFeatExtractor::init(string _caffemodel_file, string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB, string _prototxt_file, int _resizeWidth, int _resizeHeight, string _blob_name) { cudaDeviceProp prop; int whichDevice; if ( CUDA_FAILED( cudaGetDevice(&whichDevice)) ) return false; if ( CUDA_FAILED( cudaGetDeviceProperties(&prop, whichDevice)) ) return false; if (prop.canMapHostMemory != 1) { std::cout << "Device cannot map memory!" << std::endl; return false; } //if ( CUDA_FAILED( cudaSetDeviceFlags(cudaDeviceMapHost)) ) // return false; // Assign specified .caffemodel, .binaryproto, .prototxt files caffemodel_file = _caffemodel_file; binaryproto_meanfile = _binaryproto_meanfile; mean_values.push_back(_meanB); mean_values.push_back(_meanG); mean_values.push_back(_meanR); prototxt_file = _prototxt_file; //Assign blob to be extracted blob_name = _blob_name; // Load and convert model std::stringstream gieModelStream; gieModelStream.seekg(0, gieModelStream.beg); if( !caffeToGIEModel( prototxt_file, caffemodel_file, binaryproto_meanfile, std::vector< std::string > { blob_name }, 1, gieModelStream) ) { std::cout << "Failed to load: " << caffemodel_file << std::endl; } std::cout << caffemodel_file << ": loaded." << std::endl; // Create runtime inference engine execution context nvinfer1::IRuntime* infer = createInferRuntime(gLogger); if( !infer ) { std::cout << "Failed to create InferRuntime." << std::endl; } nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream); if( !engine ) { std::cout << "Failed to create CUDA engine." << std::endl; } nvinfer1::IExecutionContext* context = engine->createExecutionContext(); if( !context ) { std::cout << "failed to create execution context." << std::endl; } std::cout << "CUDA engine context initialized with " << engine->getNbBindings() << " bindings." << std::endl; mInfer = infer; mEngine = engine; mContext = context; // Determine dimensions of network bindings const int inputIndex = engine->getBindingIndex("data"); const int outputIndex = engine->getBindingIndex( blob_name.c_str() ); std::cout << caffemodel_file << " input binding index: " << inputIndex << std::endl; std::cout << caffemodel_file << " output binding index: " << outputIndex << std::endl; nvinfer1::Dims3 inputDims = engine->getBindingDimensions(inputIndex); nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex); size_t inputSize = inputDims.c * inputDims.h * inputDims.w * sizeof(float); size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float); std::cout << caffemodel_file << "input dims (c=" << inputDims.c << " h=" << inputDims.h << " w=" << inputDims.w << ") size=" << inputSize << std::endl; std::cout << caffemodel_file << "output dims (c=" << outputDims.c << " h=" << outputDims.h << " w=" << outputDims.w << ") size=" << outputSize << std::endl; // Allocate memory to hold the input image if ( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) ) { std::cout << "Failed to alloc CUDA mapped memory for input, " << inputSize << " bytes" << std::endl; } mInputSize = inputSize; mWidth = inputDims.w; mHeight = inputDims.h; // Allocate output memory to hold the result if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) ) { std::cout << "Failed to alloc CUDA mapped memory for output, " << outputSize << " bytes" << std::endl; } mOutputSize = outputSize; mOutputDims = outputDims.c; std::cout << caffemodel_file << ": initialized." << std::endl; if (binaryproto_meanfile=="") { // Set input size if the mean pixel is used resizeDims.h = _resizeHeight; resizeDims.w = _resizeWidth; resizeDims.c = 3; resizeDims.n = 1; } return true; }