コード例 #1
0
ファイル: VideoDecoderCUDA.cpp プロジェクト: ustbgaofan/QtAV
bool VideoDecoderCUDAPrivate::createCUVIDDecoder(cudaVideoCodec cudaCodec, int w, int h)
{
    if (cudaCodec == -1) {
        return false;
    }
    AutoCtxLock lock(this, vid_ctx_lock);
    Q_UNUSED(lock);
    if (dec) {
        checkCudaErrors(cuvidDestroyDecoder(dec));
    }
    memset(&dec_create_info, 0, sizeof(CUVIDDECODECREATEINFO));
    dec_create_info.ulWidth = w;
    dec_create_info.ulHeight = h;
    dec_create_info.ulNumDecodeSurfaces = kMaxDecodeSurfaces; //same as ulMaxNumDecodeSurfaces
    dec_create_info.CodecType = cudaCodec;
    dec_create_info.ChromaFormat = cudaVideoChromaFormat_420;  // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported)
    //cudaVideoCreate_PreferCUVID is slow in example. DXVA may failed to create (CUDA_ERROR_NO_DEVICE)
    // what's the difference between CUDA and CUVID?
    dec_create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; //cudaVideoCreate_Default, cudaVideoCreate_PreferCUDA, cudaVideoCreate_PreferCUVID, cudaVideoCreate_PreferDXVA
    // TODO: lav yv12
    dec_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; // NV12 (currently the only supported output format)
    dec_create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;// Weave: No deinterlacing
    //cudaVideoDeinterlaceMode_Adaptive;
    // No scaling
    dec_create_info.ulTargetWidth = dec_create_info.ulWidth;
    dec_create_info.ulTargetHeight = dec_create_info.ulHeight;
    dec_create_info.ulNumOutputSurfaces = 2;  // We won't simultaneously map more than 8 surfaces
    dec_create_info.vidLock = vid_ctx_lock;//vidCtxLock; //FIXME

    // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
    // otherwise CUDA_ERROR_OUT_OF_MEMORY on cuMemcpyDtoH
    // if ulNumDecodeSurfaces < ulMaxNumDecodeSurfaces, CurrPicIdx may be > ulNumDecodeSurfaces

    while (dec_create_info.ulNumDecodeSurfaces * codec_ctx->coded_width * codec_ctx->coded_height > 16*1024*1024) {
        dec_create_info.ulNumDecodeSurfaces--;
    }
    nb_dec_surface = dec_create_info.ulNumDecodeSurfaces;

    qDebug("ulNumDecodeSurfaces: %lu", dec_create_info.ulNumDecodeSurfaces);

    // create the decoder
    available = false;
    checkCudaErrors(cuvidCreateDecoder(&dec, &dec_create_info));
    available = true;
    return true;
}
コード例 #2
0
ファイル: VideoDecoderCUDA.cpp プロジェクト: NickD2039/QtAV
bool VideoDecoderCUDAPrivate::createCUVIDDecoder(cudaVideoCodec cudaCodec, int w, int h)
{
    if (cudaCodec == -1) {
        return false;
    }
    AutoCtxLock lock(this, vid_ctx_lock);
    Q_UNUSED(lock);
    if (dec) {
        checkCudaErrors(cuvidDestroyDecoder(dec));
    }
    memset(&dec_create_info, 0, sizeof(CUVIDDECODECREATEINFO));
    dec_create_info.ulWidth = w;
    dec_create_info.ulHeight = h;
    dec_create_info.ulNumDecodeSurfaces = nb_dec_surface; //same as ulMaxNumDecodeSurfaces
    dec_create_info.CodecType = cudaCodec;
    dec_create_info.ChromaFormat = cudaVideoChromaFormat_420;  // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported)
    //cudaVideoCreate_PreferCUVID is slow in example. DXVA may failed to create (CUDA_ERROR_NO_DEVICE)
    dec_create_info.ulCreationFlags = create_flags;
    // TODO: lav yv12
    dec_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; // NV12 (currently the only supported output format)
    dec_create_info.DeinterlaceMode = deinterlace;
    // No scaling
    dec_create_info.ulTargetWidth = dec_create_info.ulWidth;
    dec_create_info.ulTargetHeight = dec_create_info.ulHeight;
    dec_create_info.ulNumOutputSurfaces = 2;  // We won't simultaneously map more than 8 surfaces
    dec_create_info.vidLock = vid_ctx_lock;//vidCtxLock; //FIXME

    // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
    // otherwise CUDA_ERROR_OUT_OF_MEMORY on cuMemcpyDtoH
    // if ulNumDecodeSurfaces < ulMaxNumDecodeSurfaces, CurrPicIdx may be > ulNumDecodeSurfaces
    /*
     * TODO: check video memory, e.g. runtime apu extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total);
     * 24MB is too small for 4k video, only n2 surfaces can be use so decoding will be too slow
     */
#if 0
    while (dec_create_info.ulNumDecodeSurfaces * codec_ctx->coded_width * codec_ctx->coded_height > 16*1024*1024) {
        dec_create_info.ulNumDecodeSurfaces--;
    }
#endif
    // create the decoder
    available = false;
    checkCudaErrors(cuvidCreateDecoder(&dec, &dec_create_info));
    available = true;
    return true;
}
コード例 #3
0
ファイル: cuvid.c プロジェクト: kinetiknz/FFmpeg
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
{
    AVCodecContext *avctx = opaque;
    CuvidContext *ctx = avctx->priv_data;
    AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
    CUVIDDECODECREATEINFO cuinfo;

    av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);

    ctx->internal_error = 0;

    avctx->width = format->display_area.right;
    avctx->height = format->display_area.bottom;

    ff_set_sar(avctx, av_div_q(
        (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
        (AVRational){ avctx->width, avctx->height }));

    if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
        avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
    else
        avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;

    if (format->video_signal_description.video_full_range_flag)
        avctx->color_range = AVCOL_RANGE_JPEG;
    else
        avctx->color_range = AVCOL_RANGE_MPEG;

    avctx->color_primaries = format->video_signal_description.color_primaries;
    avctx->color_trc = format->video_signal_description.transfer_characteristics;
    avctx->colorspace = format->video_signal_description.matrix_coefficients;

    if (format->bitrate)
        avctx->bit_rate = format->bitrate;

    if (format->frame_rate.numerator && format->frame_rate.denominator) {
        avctx->framerate.num = format->frame_rate.numerator;
        avctx->framerate.den = format->frame_rate.denominator;
    }

    if (ctx->cudecoder
            && avctx->coded_width == format->coded_width
            && avctx->coded_height == format->coded_height
            && ctx->chroma_format == format->chroma_format
            && ctx->codec_type == format->codec)
        return 1;

    if (ctx->cudecoder) {
        av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
        ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
        if (ctx->internal_error < 0)
            return 0;
        ctx->cudecoder = NULL;
    }

    if (hwframe_ctx->pool && (
            hwframe_ctx->width < avctx->width ||
            hwframe_ctx->height < avctx->height ||
            hwframe_ctx->format != AV_PIX_FMT_CUDA ||
            hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
        av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

    if (format->chroma_format != cudaVideoChromaFormat_420) {
        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

    avctx->coded_width = format->coded_width;
    avctx->coded_height = format->coded_height;

    ctx->chroma_format = format->chroma_format;

    memset(&cuinfo, 0, sizeof(cuinfo));

    cuinfo.CodecType = ctx->codec_type = format->codec;
    cuinfo.ChromaFormat = format->chroma_format;
    cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;

    cuinfo.ulWidth = avctx->coded_width;
    cuinfo.ulHeight = avctx->coded_height;
    cuinfo.ulTargetWidth = cuinfo.ulWidth;
    cuinfo.ulTargetHeight = cuinfo.ulHeight;

    cuinfo.target_rect.left = 0;
    cuinfo.target_rect.top = 0;
    cuinfo.target_rect.right = cuinfo.ulWidth;
    cuinfo.target_rect.bottom = cuinfo.ulHeight;

    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
    cuinfo.ulNumOutputSurfaces = 1;
    cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
    cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;

    if (format->progressive_sequence) {
        ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
    } else {
        cuinfo.DeinterlaceMode = ctx->deint_mode;
    }

    if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
        avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});

    ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
    if (ctx->internal_error < 0)
        return 0;

    if (!hwframe_ctx->pool) {
        hwframe_ctx->format = AV_PIX_FMT_CUDA;
        hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
        hwframe_ctx->width = avctx->width;
        hwframe_ctx->height = avctx->height;

        if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
            return 0;
        }
    }

    return 1;
}
コード例 #4
0
ファイル: VideoDecoder.cpp プロジェクト: chengli1986/571e
VideoDecoder::VideoDecoder(const CUVIDEOFORMAT &rVideoFormat,
                           CUcontext &rContext,
                           cudaVideoCreateFlags eCreateFlags,
                           CUvideoctxlock &vidCtxLock)
    : m_VidCtxLock(vidCtxLock)
{
    // get a copy of the CUDA context
    m_Context          = rContext;
    m_VideoCreateFlags = eCreateFlags;

    printf("> VideoDecoder::cudaVideoCreateFlags = <%d>", (int)eCreateFlags);

    switch (eCreateFlags)
    {
        case cudaVideoCreate_Default:
            printf("Default (VP)\n");
            break;

        case cudaVideoCreate_PreferCUDA:
            printf("Use CUDA decoder\n");
            break;

        case cudaVideoCreate_PreferDXVA:
            printf("Use DXVA decoder\n");
            break;

        case cudaVideoCreate_PreferCUVID:
            printf("Use CUVID decoder\n");
            break;

        default:
            printf("Unknown value\n");
            break;
    }

    printf("\n");

    // Validate video format.  These are the currently supported formats via NVCUVID
    assert(cudaVideoCodec_MPEG1 == rVideoFormat.codec ||
           cudaVideoCodec_MPEG2 == rVideoFormat.codec ||
           cudaVideoCodec_MPEG4 == rVideoFormat.codec ||
           cudaVideoCodec_VC1   == rVideoFormat.codec ||
           cudaVideoCodec_H264  == rVideoFormat.codec ||
           cudaVideoCodec_JPEG  == rVideoFormat.codec ||
           cudaVideoCodec_YUV420== rVideoFormat.codec ||
           cudaVideoCodec_YV12  == rVideoFormat.codec ||
           cudaVideoCodec_NV12  == rVideoFormat.codec ||
           cudaVideoCodec_YUYV  == rVideoFormat.codec ||
           cudaVideoCodec_UYVY  == rVideoFormat.codec);

    assert(cudaVideoChromaFormat_Monochrome == rVideoFormat.chroma_format ||
           cudaVideoChromaFormat_420        == rVideoFormat.chroma_format ||
           cudaVideoChromaFormat_422        == rVideoFormat.chroma_format ||
           cudaVideoChromaFormat_444        == rVideoFormat.chroma_format);

    // Fill the decoder-create-info struct from the given video-format struct.
    memset(&oVideoDecodeCreateInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
    // Create video decoder
    oVideoDecodeCreateInfo_.CodecType           = rVideoFormat.codec;
    oVideoDecodeCreateInfo_.ulWidth             = rVideoFormat.coded_width;
    oVideoDecodeCreateInfo_.ulHeight            = rVideoFormat.coded_height;
    oVideoDecodeCreateInfo_.ulNumDecodeSurfaces = FrameQueue::cnMaximumSize;

    // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
    while (oVideoDecodeCreateInfo_.ulNumDecodeSurfaces * rVideoFormat.coded_width * rVideoFormat.coded_height > 16*1024*1024)
    {
        oVideoDecodeCreateInfo_.ulNumDecodeSurfaces--;
    }

    oVideoDecodeCreateInfo_.ChromaFormat        = rVideoFormat.chroma_format;
    oVideoDecodeCreateInfo_.OutputFormat        = cudaVideoSurfaceFormat_NV12;
    oVideoDecodeCreateInfo_.DeinterlaceMode     = cudaVideoDeinterlaceMode_Adaptive;

    // No scaling
    oVideoDecodeCreateInfo_.ulTargetWidth       = oVideoDecodeCreateInfo_.ulWidth;
    oVideoDecodeCreateInfo_.ulTargetHeight      = oVideoDecodeCreateInfo_.ulHeight;
    oVideoDecodeCreateInfo_.ulNumOutputSurfaces = MAX_FRAME_COUNT;  // We won't simultaneously map more than 8 surfaces
    oVideoDecodeCreateInfo_.ulCreationFlags     = m_VideoCreateFlags;
    oVideoDecodeCreateInfo_.vidLock             = vidCtxLock;
    // create the decoder
    CUresult oResult = cuvidCreateDecoder(&oDecoder_, &oVideoDecodeCreateInfo_);
    assert(CUDA_SUCCESS == oResult);
}