void initCudaVideo() { // bind the context lock to the CUDA context CUresult result = cuvidCtxLockCreate(&g_CtxLock, g_oContext); if (result != CUDA_SUCCESS) { printf("cuvidCtxLockCreate failed: %d\n", result); assert(0); } std::auto_ptr<VideoDecoder> apVideoDecoder(new VideoDecoder(g_pVideoSource->format(), g_oContext, g_eVideoCreateFlags, g_CtxLock)); std::auto_ptr<VideoParser> apVideoParser(new VideoParser(apVideoDecoder.get(), g_pFrameQueue)); g_pVideoSource->setParser(*apVideoParser.get()); g_pVideoParser = apVideoParser.release(); g_pVideoDecoder = apVideoDecoder.release(); // Create a Stream ID for handling Readback if (g_bReadback) { checkCudaErrors(cuStreamCreate(&g_ReadbackSID, 0)); checkCudaErrors(cuStreamCreate(&g_KernelSID, 0)); printf("> initCudaVideo()\n"); printf(" CUDA Streams (%s) <g_ReadbackSID = %p>\n", ((g_ReadbackSID == 0) ? "Disabled" : "Enabled"), g_ReadbackSID); printf(" CUDA Streams (%s) <g_KernelSID = %p>\n", ((g_KernelSID == 0) ? "Disabled" : "Enabled"), g_KernelSID); } }
void CudaVideoRender::initCudaVideo( ) { // bind the context lock to the CUDA context CUresult result = cuvidCtxLockCreate(&m_CtxLock, m_cuContext); if (result != CUDA_SUCCESS) { printf("cuvidCtxLockCreate failed: %d\n", result); assert(0); } std::auto_ptr<VideoDecoder> apVideoDecoder(new VideoDecoder(m_pVideoSource->format(), m_cuContext, m_eVideoCreateFlags, m_CtxLock)); std::auto_ptr<VideoParser> apVideoParser(new VideoParser(apVideoDecoder.get(), m_pFrameQueue)); m_pVideoSource->setParser(*apVideoParser.get()); m_pVideoParser = apVideoParser.release(); m_pVideoDecoder = apVideoDecoder.release(); // Create a Stream ID for handling Readback if (m_bReadback) { cutilDrvSafeCallNoSync( cuStreamCreate(&m_ReadbackSID, 0) ); cutilDrvSafeCallNoSync( cuStreamCreate(&m_KernelSID, 0) ); printf("> initCudaVideo()\n"); printf(" CUDA Streams (%s) <m_ReadbackSID = %p>\n", ((m_ReadbackSID == 0) ? "Disabled" : "Enabled"), m_ReadbackSID ); printf(" CUDA Streams (%s) <m_KernelSID = %p>\n", ((m_KernelSID == 0) ? "Disabled" : "Enabled"), m_KernelSID ); } }
bool VideoDecoderCUDAPrivate::initCuda() { CUresult result = cuInit(0); if (result != CUDA_SUCCESS) { available = false; qWarning("cuInit(0) faile (%d)", result); return false; } cudev = GetMaxGflopsGraphicsDeviceId(); int clockRate; cuDeviceGetAttribute(&clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, cudev); int major, minor; cuDeviceComputeCapability(&major, &minor, cudev); char devname[256]; cuDeviceGetName(devname, 256, cudev); description = QString("CUDA device: %1 %2.%3 %4 MHz").arg(devname).arg(major).arg(minor).arg(clockRate/1000); //TODO: cuD3DCtxCreate > cuGLCtxCreate > cuCtxCreate checkCudaErrors(cuCtxCreate(&cuctx, CU_CTX_SCHED_BLOCKING_SYNC, cudev)); //CU_CTX_SCHED_AUTO? CUcontext cuCurrent = NULL; result = cuCtxPopCurrent(&cuCurrent); if (result != CUDA_SUCCESS) { qWarning("cuCtxPopCurrent: %d\n", result); return false; } checkCudaErrors(cuvidCtxLockCreate(&vid_ctx_lock, cuctx)); { AutoCtxLock lock(this, vid_ctx_lock); Q_UNUSED(lock); //Flags- Parameters for stream creation (must be 0 (CU_STREAM_DEFAULT=0 in cuda5) in cuda 4.2, no CU_STREAM_NON_BLOCKING) checkCudaErrors(cuStreamCreate(&stream, 0));//CU_STREAM_NON_BLOCKING)); //CU_STREAM_DEFAULT //require compute capability >= 1.1 //flag: Reserved for future use, must be 0 //cuStreamAddCallback(stream, CUstreamCallback, this, 0); } return true; }
cv::gpu::VideoReader_GPU::Impl::Impl(const cv::Ptr<VideoSource>& source) : videoSource_(source), lock_(0) { // init context GpuMat temp(1, 1, CV_8UC1); temp.release(); DeviceInfo devInfo; CV_Assert( devInfo.supports(FEATURE_SET_COMPUTE_11) ); CUcontext ctx; cuSafeCall( cuCtxGetCurrent(&ctx) ); cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); frameQueue_.reset(new detail::FrameQueue); videoDecoder_.reset(new detail::VideoDecoder(videoSource_->format(), lock_)); videoParser_.reset(new detail::VideoParser(videoDecoder_.get(), frameQueue_.get())); videoSource_->setFrameQueue(frameQueue_.get()); videoSource_->setVideoParser(videoParser_.get()); videoSource_->start(); }
/* * Main transcoding thread * Initializes CUDA device, decodes frames with NVCUVID API and adds them to frame queue, which passes them to NVENC for encoding, then output */ void NVENCGUI::Transcode() { CUresult result; // initialize CUDA result = cuInit(0); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_INIT); return; } NVENCSTATUS nvStatus = NV_ENC_SUCCESS; // no input file if (encodeConfig.inputFileName == NULL) { emit Error(ERR_INPUT); return; } // no output file if (encodeConfig.outputFileName == NULL) { emit Error(ERR_OUTPUT); return; } // unable to open input file if (!fopen(encodeConfig.inputFileName, "r")) { emit Error(ERR_INPUT); return; } encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb"); // unable to open output file if (encodeConfig.fOutput == NULL) { emit Error(ERR_OUTPUT); return; } // initialize CUDA on device and set CUDA context CUcontext cudaCtx; CUdevice device; result = cuDeviceGet(&device, encodeConfig.deviceID); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_DEVICE); return; } result = cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } // initialize NVCUVID context CUcontext curCtx; CUvideoctxlock ctxLock; result = cuCtxPopCurrent(&curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } result = cuvidCtxLockCreate(&ctxLock, curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } CudaDecoder* pDecoder = new CudaDecoder; FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock); pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height); int decodedW, decodedH, decodedFRN, decodedFRD; pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD); // If the width/height is not set, set to same as source if (encodeConfig.width <= 0 || encodeConfig.height <= 0) { encodeConfig.width = decodedW; encodeConfig.height = decodedH; } // same, except for fps if (encodeConfig.fps <= 0) { if (decodedFRN <= 0 || decodedFRD <= 0) encodeConfig.fps = 30; else encodeConfig.fps = decodedFRN / decodedFRD; } // initialize frame queue with width/height pFrameQueue->init(encodeConfig.width, encodeConfig.height); VideoEncoder* pEncoder = new VideoEncoder(ctxLock); assert(pEncoder->GetHWEncoder()); // initialize NVENC HW Encoder nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_INIT); return; } // get preset GUID encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec); // create encoder nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_CREATE); return; } // create buffer nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_BUFFER); return; } // print details to text window, start counter emit PrintDetails(); NvQueryPerformanceCounter(&results.lStart); //start decoding thread #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif int encodedFrames = 0; //start encoding thread while (!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty())) { CUVIDPARSERDISPINFO pInfo; if (pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.unpaired_field = 1; oVPP.progressive_frame = 1; cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); EncodeFrameConfig stEncodeConfig = { 0 }; stEncodeConfig.dptr = dMappedFrame; stEncodeConfig.pitch = pitch; stEncodeConfig.width = encodeConfig.width; stEncodeConfig.height = encodeConfig.height; pEncoder->EncodeFrame(&stEncodeConfig); cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); pFrameQueue->releaseFrame(&pInfo); //emit IncrementEncodedFrames(); } } // flush pEncoder->EncodeFrame(NULL, true); // end decoding thread #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif // print transcoding details if (pEncoder->GetEncodedFrames() > 0) { results.decodedFrames = pDecoder->m_decodedFrames; results.encodedFrames = pEncoder->GetEncodedFrames(); NvQueryPerformanceCounter(&results.lEnd); NvQueryPerformanceFrequency(&results.lFreq); results.elapsedTime = (double)(results.lEnd - results.lStart) / (double)results.lFreq; } emit TranscodingEnd(); // clean up cuvidCtxLockDestroy(ctxLock); pEncoder->Deinitialize(); delete pDecoder; delete pEncoder; delete pFrameQueue; result = cuCtxDestroy(cudaCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX_DESTROY); return; } return; }