bool VideoDecoderCUDAPrivate::createCUVIDDecoder(cudaVideoCodec cudaCodec, int w, int h) { if (cudaCodec == -1) { return false; } AutoCtxLock lock(this, vid_ctx_lock); Q_UNUSED(lock); if (dec) { checkCudaErrors(cuvidDestroyDecoder(dec)); } memset(&dec_create_info, 0, sizeof(CUVIDDECODECREATEINFO)); dec_create_info.ulWidth = w; dec_create_info.ulHeight = h; dec_create_info.ulNumDecodeSurfaces = kMaxDecodeSurfaces; //same as ulMaxNumDecodeSurfaces dec_create_info.CodecType = cudaCodec; dec_create_info.ChromaFormat = cudaVideoChromaFormat_420; // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) //cudaVideoCreate_PreferCUVID is slow in example. DXVA may failed to create (CUDA_ERROR_NO_DEVICE) // what's the difference between CUDA and CUVID? dec_create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; //cudaVideoCreate_Default, cudaVideoCreate_PreferCUDA, cudaVideoCreate_PreferCUVID, cudaVideoCreate_PreferDXVA // TODO: lav yv12 dec_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; // NV12 (currently the only supported output format) dec_create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;// Weave: No deinterlacing //cudaVideoDeinterlaceMode_Adaptive; // No scaling dec_create_info.ulTargetWidth = dec_create_info.ulWidth; dec_create_info.ulTargetHeight = dec_create_info.ulHeight; dec_create_info.ulNumOutputSurfaces = 2; // We won't simultaneously map more than 8 surfaces dec_create_info.vidLock = vid_ctx_lock;//vidCtxLock; //FIXME // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes) // otherwise CUDA_ERROR_OUT_OF_MEMORY on cuMemcpyDtoH // if ulNumDecodeSurfaces < ulMaxNumDecodeSurfaces, CurrPicIdx may be > ulNumDecodeSurfaces while (dec_create_info.ulNumDecodeSurfaces * codec_ctx->coded_width * codec_ctx->coded_height > 16*1024*1024) { dec_create_info.ulNumDecodeSurfaces--; } nb_dec_surface = dec_create_info.ulNumDecodeSurfaces; qDebug("ulNumDecodeSurfaces: %lu", dec_create_info.ulNumDecodeSurfaces); // create the decoder available = false; checkCudaErrors(cuvidCreateDecoder(&dec, &dec_create_info)); available = true; return true; }
bool VideoDecoderCUDAPrivate::createCUVIDDecoder(cudaVideoCodec cudaCodec, int w, int h) { if (cudaCodec == -1) { return false; } AutoCtxLock lock(this, vid_ctx_lock); Q_UNUSED(lock); if (dec) { checkCudaErrors(cuvidDestroyDecoder(dec)); } memset(&dec_create_info, 0, sizeof(CUVIDDECODECREATEINFO)); dec_create_info.ulWidth = w; dec_create_info.ulHeight = h; dec_create_info.ulNumDecodeSurfaces = nb_dec_surface; //same as ulMaxNumDecodeSurfaces dec_create_info.CodecType = cudaCodec; dec_create_info.ChromaFormat = cudaVideoChromaFormat_420; // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) //cudaVideoCreate_PreferCUVID is slow in example. DXVA may failed to create (CUDA_ERROR_NO_DEVICE) dec_create_info.ulCreationFlags = create_flags; // TODO: lav yv12 dec_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; // NV12 (currently the only supported output format) dec_create_info.DeinterlaceMode = deinterlace; // No scaling dec_create_info.ulTargetWidth = dec_create_info.ulWidth; dec_create_info.ulTargetHeight = dec_create_info.ulHeight; dec_create_info.ulNumOutputSurfaces = 2; // We won't simultaneously map more than 8 surfaces dec_create_info.vidLock = vid_ctx_lock;//vidCtxLock; //FIXME // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes) // otherwise CUDA_ERROR_OUT_OF_MEMORY on cuMemcpyDtoH // if ulNumDecodeSurfaces < ulMaxNumDecodeSurfaces, CurrPicIdx may be > ulNumDecodeSurfaces /* * TODO: check video memory, e.g. runtime apu extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total); * 24MB is too small for 4k video, only n2 surfaces can be use so decoding will be too slow */ #if 0 while (dec_create_info.ulNumDecodeSurfaces * codec_ctx->coded_width * codec_ctx->coded_height > 16*1024*1024) { dec_create_info.ulNumDecodeSurfaces--; } #endif // create the decoder available = false; checkCudaErrors(cuvidCreateDecoder(&dec, &dec_create_info)); available = true; return true; }
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; CUVIDDECODECREATEINFO cuinfo; av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence); ctx->internal_error = 0; avctx->width = format->display_area.right; avctx->height = format->display_area.bottom; ff_set_sar(avctx, av_div_q( (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y }, (AVRational){ avctx->width, avctx->height })); if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT; else avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT; if (format->video_signal_description.video_full_range_flag) avctx->color_range = AVCOL_RANGE_JPEG; else avctx->color_range = AVCOL_RANGE_MPEG; avctx->color_primaries = format->video_signal_description.color_primaries; avctx->color_trc = format->video_signal_description.transfer_characteristics; avctx->colorspace = format->video_signal_description.matrix_coefficients; if (format->bitrate) avctx->bit_rate = format->bitrate; if (format->frame_rate.numerator && format->frame_rate.denominator) { avctx->framerate.num = format->frame_rate.numerator; avctx->framerate.den = format->frame_rate.denominator; } if (ctx->cudecoder && avctx->coded_width == format->coded_width && avctx->coded_height == format->coded_height && ctx->chroma_format == format->chroma_format && ctx->codec_type == format->codec) return 1; if (ctx->cudecoder) { av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n"); ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder)); if (ctx->internal_error < 0) return 0; ctx->cudecoder = NULL; } if (hwframe_ctx->pool && ( hwframe_ctx->width < avctx->width || hwframe_ctx->height < avctx->height || hwframe_ctx->format != AV_PIX_FMT_CUDA || hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) { av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } if (format->chroma_format != cudaVideoChromaFormat_420) { av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } avctx->coded_width = format->coded_width; avctx->coded_height = format->coded_height; ctx->chroma_format = format->chroma_format; memset(&cuinfo, 0, sizeof(cuinfo)); cuinfo.CodecType = ctx->codec_type = format->codec; cuinfo.ChromaFormat = format->chroma_format; cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; cuinfo.ulWidth = avctx->coded_width; cuinfo.ulHeight = avctx->coded_height; cuinfo.ulTargetWidth = cuinfo.ulWidth; cuinfo.ulTargetHeight = cuinfo.ulHeight; cuinfo.target_rect.left = 0; cuinfo.target_rect.top = 0; cuinfo.target_rect.right = cuinfo.ulWidth; cuinfo.target_rect.bottom = cuinfo.ulHeight; cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; if (format->progressive_sequence) { ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; } else { cuinfo.DeinterlaceMode = ctx->deint_mode; } if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); if (ctx->internal_error < 0) return 0; if (!hwframe_ctx->pool) { hwframe_ctx->format = AV_PIX_FMT_CUDA; hwframe_ctx->sw_format = AV_PIX_FMT_NV12; hwframe_ctx->width = avctx->width; hwframe_ctx->height = avctx->height; if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n"); return 0; } } return 1; }
VideoDecoder::VideoDecoder(const CUVIDEOFORMAT &rVideoFormat, CUcontext &rContext, cudaVideoCreateFlags eCreateFlags, CUvideoctxlock &vidCtxLock) : m_VidCtxLock(vidCtxLock) { // get a copy of the CUDA context m_Context = rContext; m_VideoCreateFlags = eCreateFlags; printf("> VideoDecoder::cudaVideoCreateFlags = <%d>", (int)eCreateFlags); switch (eCreateFlags) { case cudaVideoCreate_Default: printf("Default (VP)\n"); break; case cudaVideoCreate_PreferCUDA: printf("Use CUDA decoder\n"); break; case cudaVideoCreate_PreferDXVA: printf("Use DXVA decoder\n"); break; case cudaVideoCreate_PreferCUVID: printf("Use CUVID decoder\n"); break; default: printf("Unknown value\n"); break; } printf("\n"); // Validate video format. These are the currently supported formats via NVCUVID assert(cudaVideoCodec_MPEG1 == rVideoFormat.codec || cudaVideoCodec_MPEG2 == rVideoFormat.codec || cudaVideoCodec_MPEG4 == rVideoFormat.codec || cudaVideoCodec_VC1 == rVideoFormat.codec || cudaVideoCodec_H264 == rVideoFormat.codec || cudaVideoCodec_JPEG == rVideoFormat.codec || cudaVideoCodec_YUV420== rVideoFormat.codec || cudaVideoCodec_YV12 == rVideoFormat.codec || cudaVideoCodec_NV12 == rVideoFormat.codec || cudaVideoCodec_YUYV == rVideoFormat.codec || cudaVideoCodec_UYVY == rVideoFormat.codec); assert(cudaVideoChromaFormat_Monochrome == rVideoFormat.chroma_format || cudaVideoChromaFormat_420 == rVideoFormat.chroma_format || cudaVideoChromaFormat_422 == rVideoFormat.chroma_format || cudaVideoChromaFormat_444 == rVideoFormat.chroma_format); // Fill the decoder-create-info struct from the given video-format struct. memset(&oVideoDecodeCreateInfo_, 0, sizeof(CUVIDDECODECREATEINFO)); // Create video decoder oVideoDecodeCreateInfo_.CodecType = rVideoFormat.codec; oVideoDecodeCreateInfo_.ulWidth = rVideoFormat.coded_width; oVideoDecodeCreateInfo_.ulHeight = rVideoFormat.coded_height; oVideoDecodeCreateInfo_.ulNumDecodeSurfaces = FrameQueue::cnMaximumSize; // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes) while (oVideoDecodeCreateInfo_.ulNumDecodeSurfaces * rVideoFormat.coded_width * rVideoFormat.coded_height > 16*1024*1024) { oVideoDecodeCreateInfo_.ulNumDecodeSurfaces--; } oVideoDecodeCreateInfo_.ChromaFormat = rVideoFormat.chroma_format; oVideoDecodeCreateInfo_.OutputFormat = cudaVideoSurfaceFormat_NV12; oVideoDecodeCreateInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive; // No scaling oVideoDecodeCreateInfo_.ulTargetWidth = oVideoDecodeCreateInfo_.ulWidth; oVideoDecodeCreateInfo_.ulTargetHeight = oVideoDecodeCreateInfo_.ulHeight; oVideoDecodeCreateInfo_.ulNumOutputSurfaces = MAX_FRAME_COUNT; // We won't simultaneously map more than 8 surfaces oVideoDecodeCreateInfo_.ulCreationFlags = m_VideoCreateFlags; oVideoDecodeCreateInfo_.vidLock = vidCtxLock; // create the decoder CUresult oResult = cuvidCreateDecoder(&oDecoder_, &oVideoDecodeCreateInfo_); assert(CUDA_SUCCESS == oResult); }