bool VideoDecoderCUDAPrivate::processDecodedData(CUVIDPARSERDISPINFO *cuviddisp, VideoFrame* outFrame) { int num_fields = cuviddisp->progressive_frame ? 1 : 2+cuviddisp->repeat_first_field; for (int active_field = 0; active_field < num_fields; ++active_field) { CUVIDPROCPARAMS proc_params; memset(&proc_params, 0, sizeof(CUVIDPROCPARAMS)); proc_params.progressive_frame = cuviddisp->progressive_frame; //check user config proc_params.second_field = active_field == 1; //check user config proc_params.top_field_first = cuviddisp->top_field_first; proc_params.unpaired_field = cuviddisp->progressive_frame == 1; CUdeviceptr devptr; unsigned int pitch; cuvidCtxLock(vid_ctx_lock, 0); CUresult cuStatus = cuvidMapVideoFrame(dec, cuviddisp->picture_index, &devptr, &pitch, &proc_params); if (cuStatus != CUDA_SUCCESS) { qWarning("cuvidMapVideoFrame failed on index %d (%#x, %s)", cuviddisp->picture_index, cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } #define PAD_ALIGN(x,mask) ( (x + mask) & ~mask ) //uint w = dec_create_info.ulWidth;//PAD_ALIGN(dec_create_info.ulWidth, 0x3F); uint h = dec_create_info.ulHeight;//PAD_ALIGN(dec_create_info.ulHeight, 0x0F); //? #undef PAD_ALIGN int size = pitch*h*3/2; if (size > host_data_size && host_data) { cuMemFreeHost(host_data); host_data = 0; host_data_size = 0; } if (!host_data) { cuStatus = cuMemAllocHost((void**)&host_data, size); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemAllocHost failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } host_data_size = size; } if (!host_data) { qWarning("No valid staging memory!"); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuMemcpyDtoHAsync(host_data, devptr, size, stream); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemcpyDtoHAsync failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuCtxSynchronize(); if (cuStatus != CUDA_SUCCESS) { qWarning("cuCtxSynchronize failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); } cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); //qDebug("mark not in use pic_index: %d", cuviddisp->picture_index); surface_in_use[cuviddisp->picture_index] = false; uchar *planes[] = { host_data, host_data + pitch * h }; int pitches[] = { (int)pitch, (int)pitch }; VideoFrame frame(codec_ctx->width, codec_ctx->height, VideoFormat::Format_NV12); frame.setBits(planes); frame.setBytesPerLine(pitches); //TODO: is clone required? may crash on clone, I should review clone() //frame = frame.clone(); if (outFrame) { *outFrame = frame.clone(); } #if COPY_ON_DECODE frame_queue.put(frame.clone()); #endif //qDebug("frame queue size: %d", frame_queue.size()); } return true; }
void VideoDecoder::unmapFrame(CUdeviceptr pDevice) { CUresult oResult = cuvidUnmapVideoFrame(oDecoder_, pDevice); assert(CUDA_SUCCESS == oResult); }
void unmapFrame(cuda::GpuMat& frame) { cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) ); frame.release(); }
/* * Main transcoding thread * Initializes CUDA device, decodes frames with NVCUVID API and adds them to frame queue, which passes them to NVENC for encoding, then output */ void NVENCGUI::Transcode() { CUresult result; // initialize CUDA result = cuInit(0); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_INIT); return; } NVENCSTATUS nvStatus = NV_ENC_SUCCESS; // no input file if (encodeConfig.inputFileName == NULL) { emit Error(ERR_INPUT); return; } // no output file if (encodeConfig.outputFileName == NULL) { emit Error(ERR_OUTPUT); return; } // unable to open input file if (!fopen(encodeConfig.inputFileName, "r")) { emit Error(ERR_INPUT); return; } encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb"); // unable to open output file if (encodeConfig.fOutput == NULL) { emit Error(ERR_OUTPUT); return; } // initialize CUDA on device and set CUDA context CUcontext cudaCtx; CUdevice device; result = cuDeviceGet(&device, encodeConfig.deviceID); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_DEVICE); return; } result = cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } // initialize NVCUVID context CUcontext curCtx; CUvideoctxlock ctxLock; result = cuCtxPopCurrent(&curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } result = cuvidCtxLockCreate(&ctxLock, curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } CudaDecoder* pDecoder = new CudaDecoder; FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock); pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height); int decodedW, decodedH, decodedFRN, decodedFRD; pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD); // If the width/height is not set, set to same as source if (encodeConfig.width <= 0 || encodeConfig.height <= 0) { encodeConfig.width = decodedW; encodeConfig.height = decodedH; } // same, except for fps if (encodeConfig.fps <= 0) { if (decodedFRN <= 0 || decodedFRD <= 0) encodeConfig.fps = 30; else encodeConfig.fps = decodedFRN / decodedFRD; } // initialize frame queue with width/height pFrameQueue->init(encodeConfig.width, encodeConfig.height); VideoEncoder* pEncoder = new VideoEncoder(ctxLock); assert(pEncoder->GetHWEncoder()); // initialize NVENC HW Encoder nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_INIT); return; } // get preset GUID encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec); // create encoder nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_CREATE); return; } // create buffer nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_BUFFER); return; } // print details to text window, start counter emit PrintDetails(); NvQueryPerformanceCounter(&results.lStart); //start decoding thread #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif int encodedFrames = 0; //start encoding thread while (!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty())) { CUVIDPARSERDISPINFO pInfo; if (pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.unpaired_field = 1; oVPP.progressive_frame = 1; cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); EncodeFrameConfig stEncodeConfig = { 0 }; stEncodeConfig.dptr = dMappedFrame; stEncodeConfig.pitch = pitch; stEncodeConfig.width = encodeConfig.width; stEncodeConfig.height = encodeConfig.height; pEncoder->EncodeFrame(&stEncodeConfig); cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); pFrameQueue->releaseFrame(&pInfo); //emit IncrementEncodedFrames(); } } // flush pEncoder->EncodeFrame(NULL, true); // end decoding thread #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif // print transcoding details if (pEncoder->GetEncodedFrames() > 0) { results.decodedFrames = pDecoder->m_decodedFrames; results.encodedFrames = pEncoder->GetEncodedFrames(); NvQueryPerformanceCounter(&results.lEnd); NvQueryPerformanceFrequency(&results.lFreq); results.elapsedTime = (double)(results.lEnd - results.lStart) / (double)results.lFreq; } emit TranscodingEnd(); // clean up cuvidCtxLockDestroy(ctxLock); pEncoder->Deinitialize(); delete pDecoder; delete pEncoder; delete pFrameQueue; result = cuCtxDestroy(cudaCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX_DESTROY); return; } return; }