// ConvertRGBA bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy ) { if( !input || !output ) return false; if( !mRGBA[0] ) { const size_t size = mWidth * mHeight * sizeof(float4); for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) { if( zeroCopy ) { void* cpuPtr = NULL; void* gpuPtr = NULL; if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) ) { printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight); return false; } if( cpuPtr != gpuPtr ) { printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n"); return false; } mRGBA[n] = gpuPtr; } else { if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) ) { printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight); return false; } } } printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS); } if( onboardCamera() ) { // onboard camera is NV12 if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) return false; } else { // USB webcam is RGB if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) return false; } *output = mRGBA[mLatestRGBA]; mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS; return true; }
// checkBuffer void gstCamera::checkBuffer() { if( !mAppSink ) return; // block waiting for the buffer GstSample* gstSample = gst_app_sink_pull_sample(mAppSink); if( !gstSample ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_app_sink_pull_sample() returned NULL...\n"); return; } GstBuffer* gstBuffer = gst_sample_get_buffer(gstSample); if( !gstBuffer ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_sample_get_buffer() returned NULL...\n"); return; } // retrieve GstMapInfo map; if( !gst_buffer_map(gstBuffer, &map, GST_MAP_READ) ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer_map() failed...\n"); return; } //gst_util_dump_mem(map.data, map.size); void* gstData = map.data; //GST_BUFFER_DATA(gstBuffer); const uint32_t gstSize = map.size; //GST_BUFFER_SIZE(gstBuffer); if( !gstData ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL data pointer...\n"); release_return; } // retrieve caps GstCaps* gstCaps = gst_sample_get_caps(gstSample); if( !gstCaps ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL caps...\n"); release_return; } GstStructure* gstCapsStruct = gst_caps_get_structure(gstCaps, 0); if( !gstCapsStruct ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_caps had NULL structure...\n"); release_return; } // get width & height of the buffer int width = 0; int height = 0; if( !gst_structure_get_int(gstCapsStruct, "width", &width) || !gst_structure_get_int(gstCapsStruct, "height", &height) ) { printf(LOG_GSTREAMER "gstreamer camera -- gst_caps missing width/height...\n"); release_return; } if( width < 1 || height < 1 ) release_return; mWidth = width; mHeight = height; mDepth = (gstSize * 8) / (width * height); mSize = gstSize; //printf(LOG_GSTREAMER "gstreamer camera recieved %ix%i frame (%u bytes, %u bpp)\n", width, height, gstSize, mDepth); // make sure ringbuffer is allocated if( !mRingbufferCPU[0] ) { for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) { if( !cudaAllocMapped(&mRingbufferCPU[n], &mRingbufferGPU[n], gstSize) ) printf(LOG_CUDA "gstreamer camera -- failed to allocate ringbuffer %u (size=%u)\n", n, gstSize); } printf(LOG_CUDA "gstreamer camera -- allocated %u ringbuffers, %u bytes each\n", NUM_RINGBUFFERS, gstSize); } // copy to next ringbuffer const uint32_t nextRingbuffer = (mLatestRingbuffer + 1) % NUM_RINGBUFFERS; //printf(LOG_GSTREAMER "gstreamer camera -- using ringbuffer #%u for next frame\n", nextRingbuffer); memcpy(mRingbufferCPU[nextRingbuffer], gstData, gstSize); gst_buffer_unmap(gstBuffer, &map); //gst_buffer_unref(gstBuffer); gst_sample_unref(gstSample); // update and signal sleeping threads mRingMutex->lock(); mLatestRingbuffer = nextRingbuffer; mLatestRetrieved = false; mRingMutex->unlock(); mWaitEvent->wakeAll(); }
bool GIEFeatExtractor::init(string _caffemodel_file, string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB, string _prototxt_file, int _resizeWidth, int _resizeHeight, string _blob_name) { cudaDeviceProp prop; int whichDevice; if ( CUDA_FAILED( cudaGetDevice(&whichDevice)) ) return false; if ( CUDA_FAILED( cudaGetDeviceProperties(&prop, whichDevice)) ) return false; if (prop.canMapHostMemory != 1) { std::cout << "Device cannot map memory!" << std::endl; return false; } //if ( CUDA_FAILED( cudaSetDeviceFlags(cudaDeviceMapHost)) ) // return false; // Assign specified .caffemodel, .binaryproto, .prototxt files caffemodel_file = _caffemodel_file; binaryproto_meanfile = _binaryproto_meanfile; mean_values.push_back(_meanB); mean_values.push_back(_meanG); mean_values.push_back(_meanR); prototxt_file = _prototxt_file; //Assign blob to be extracted blob_name = _blob_name; // Load and convert model std::stringstream gieModelStream; gieModelStream.seekg(0, gieModelStream.beg); if( !caffeToGIEModel( prototxt_file, caffemodel_file, binaryproto_meanfile, std::vector< std::string > { blob_name }, 1, gieModelStream) ) { std::cout << "Failed to load: " << caffemodel_file << std::endl; } std::cout << caffemodel_file << ": loaded." << std::endl; // Create runtime inference engine execution context nvinfer1::IRuntime* infer = createInferRuntime(gLogger); if( !infer ) { std::cout << "Failed to create InferRuntime." << std::endl; } nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream); if( !engine ) { std::cout << "Failed to create CUDA engine." << std::endl; } nvinfer1::IExecutionContext* context = engine->createExecutionContext(); if( !context ) { std::cout << "failed to create execution context." << std::endl; } std::cout << "CUDA engine context initialized with " << engine->getNbBindings() << " bindings." << std::endl; mInfer = infer; mEngine = engine; mContext = context; // Determine dimensions of network bindings const int inputIndex = engine->getBindingIndex("data"); const int outputIndex = engine->getBindingIndex( blob_name.c_str() ); std::cout << caffemodel_file << " input binding index: " << inputIndex << std::endl; std::cout << caffemodel_file << " output binding index: " << outputIndex << std::endl; nvinfer1::Dims3 inputDims = engine->getBindingDimensions(inputIndex); nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex); size_t inputSize = inputDims.c * inputDims.h * inputDims.w * sizeof(float); size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float); std::cout << caffemodel_file << "input dims (c=" << inputDims.c << " h=" << inputDims.h << " w=" << inputDims.w << ") size=" << inputSize << std::endl; std::cout << caffemodel_file << "output dims (c=" << outputDims.c << " h=" << outputDims.h << " w=" << outputDims.w << ") size=" << outputSize << std::endl; // Allocate memory to hold the input image if ( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) ) { std::cout << "Failed to alloc CUDA mapped memory for input, " << inputSize << " bytes" << std::endl; } mInputSize = inputSize; mWidth = inputDims.w; mHeight = inputDims.h; // Allocate output memory to hold the result if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) ) { std::cout << "Failed to alloc CUDA mapped memory for output, " << outputSize << " bytes" << std::endl; } mOutputSize = outputSize; mOutputDims = outputDims.c; std::cout << caffemodel_file << ": initialized." << std::endl; if (binaryproto_meanfile=="") { // Set input size if the mean pixel is used resizeDims.h = _resizeHeight; resizeDims.w = _resizeWidth; resizeDims.c = 3; resizeDims.n = 1; } return true; }