예제 #1
0
bool VideoDecoderCUDAPrivate::createCUVIDDecoder(cudaVideoCodec cudaCodec, int w, int h)
{
    if (cudaCodec == -1) {
        return false;
    }
    AutoCtxLock lock(this, vid_ctx_lock);
    Q_UNUSED(lock);
    if (dec) {
        checkCudaErrors(cuvidDestroyDecoder(dec));
    }
    memset(&dec_create_info, 0, sizeof(CUVIDDECODECREATEINFO));
    dec_create_info.ulWidth = w;
    dec_create_info.ulHeight = h;
    dec_create_info.ulNumDecodeSurfaces = nb_dec_surface; //same as ulMaxNumDecodeSurfaces
    dec_create_info.CodecType = cudaCodec;
    dec_create_info.ChromaFormat = cudaVideoChromaFormat_420;  // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported)
    //cudaVideoCreate_PreferCUVID is slow in example. DXVA may failed to create (CUDA_ERROR_NO_DEVICE)
    dec_create_info.ulCreationFlags = create_flags;
    // TODO: lav yv12
    dec_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; // NV12 (currently the only supported output format)
    dec_create_info.DeinterlaceMode = deinterlace;
    // No scaling
    dec_create_info.ulTargetWidth = dec_create_info.ulWidth;
    dec_create_info.ulTargetHeight = dec_create_info.ulHeight;
    dec_create_info.ulNumOutputSurfaces = 2;  // We won't simultaneously map more than 8 surfaces
    dec_create_info.vidLock = vid_ctx_lock;//vidCtxLock; //FIXME

    // Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
    // otherwise CUDA_ERROR_OUT_OF_MEMORY on cuMemcpyDtoH
    // if ulNumDecodeSurfaces < ulMaxNumDecodeSurfaces, CurrPicIdx may be > ulNumDecodeSurfaces
    /*
     * TODO: check video memory, e.g. runtime apu extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total);
     * 24MB is too small for 4k video, only n2 surfaces can be use so decoding will be too slow
     */
#if 0
    while (dec_create_info.ulNumDecodeSurfaces * codec_ctx->coded_width * codec_ctx->coded_height > 16*1024*1024) {
        dec_create_info.ulNumDecodeSurfaces--;
    }
#endif
    // create the decoder
    available = false;
    checkCudaErrors(cuvidCreateDecoder(&dec, &dec_create_info));
    available = true;
    return true;
}
예제 #2
0
bool VideoDecoderCUDAPrivate::releaseCuda()
{
    if (!can_load)
        return true;
    if (dec) {
        cuvidDestroyDecoder(dec);
        dec = 0;
    }
    if (parser) {
        cuvidDestroyVideoParser(parser);
        parser = 0;
    }
    if (stream) {
        cuStreamDestroy(stream);
        stream = 0;
    }
    cuvidCtxLockDestroy(vid_ctx_lock);
    if (cuctx) {
        checkCudaErrors(cuCtxDestroy(cuctx));
    }
    return true;
}
예제 #3
0
파일: cuvid.c 프로젝트: kinetiknz/FFmpeg
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
{
    AVCodecContext *avctx = opaque;
    CuvidContext *ctx = avctx->priv_data;
    AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
    CUVIDDECODECREATEINFO cuinfo;

    av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);

    ctx->internal_error = 0;

    avctx->width = format->display_area.right;
    avctx->height = format->display_area.bottom;

    ff_set_sar(avctx, av_div_q(
        (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
        (AVRational){ avctx->width, avctx->height }));

    if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
        avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
    else
        avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;

    if (format->video_signal_description.video_full_range_flag)
        avctx->color_range = AVCOL_RANGE_JPEG;
    else
        avctx->color_range = AVCOL_RANGE_MPEG;

    avctx->color_primaries = format->video_signal_description.color_primaries;
    avctx->color_trc = format->video_signal_description.transfer_characteristics;
    avctx->colorspace = format->video_signal_description.matrix_coefficients;

    if (format->bitrate)
        avctx->bit_rate = format->bitrate;

    if (format->frame_rate.numerator && format->frame_rate.denominator) {
        avctx->framerate.num = format->frame_rate.numerator;
        avctx->framerate.den = format->frame_rate.denominator;
    }

    if (ctx->cudecoder
            && avctx->coded_width == format->coded_width
            && avctx->coded_height == format->coded_height
            && ctx->chroma_format == format->chroma_format
            && ctx->codec_type == format->codec)
        return 1;

    if (ctx->cudecoder) {
        av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
        ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
        if (ctx->internal_error < 0)
            return 0;
        ctx->cudecoder = NULL;
    }

    if (hwframe_ctx->pool && (
            hwframe_ctx->width < avctx->width ||
            hwframe_ctx->height < avctx->height ||
            hwframe_ctx->format != AV_PIX_FMT_CUDA ||
            hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
        av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

    if (format->chroma_format != cudaVideoChromaFormat_420) {
        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

    avctx->coded_width = format->coded_width;
    avctx->coded_height = format->coded_height;

    ctx->chroma_format = format->chroma_format;

    memset(&cuinfo, 0, sizeof(cuinfo));

    cuinfo.CodecType = ctx->codec_type = format->codec;
    cuinfo.ChromaFormat = format->chroma_format;
    cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;

    cuinfo.ulWidth = avctx->coded_width;
    cuinfo.ulHeight = avctx->coded_height;
    cuinfo.ulTargetWidth = cuinfo.ulWidth;
    cuinfo.ulTargetHeight = cuinfo.ulHeight;

    cuinfo.target_rect.left = 0;
    cuinfo.target_rect.top = 0;
    cuinfo.target_rect.right = cuinfo.ulWidth;
    cuinfo.target_rect.bottom = cuinfo.ulHeight;

    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
    cuinfo.ulNumOutputSurfaces = 1;
    cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
    cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;

    if (format->progressive_sequence) {
        ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
    } else {
        cuinfo.DeinterlaceMode = ctx->deint_mode;
    }

    if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
        avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});

    ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
    if (ctx->internal_error < 0)
        return 0;

    if (!hwframe_ctx->pool) {
        hwframe_ctx->format = AV_PIX_FMT_CUDA;
        hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
        hwframe_ctx->width = avctx->width;
        hwframe_ctx->height = avctx->height;

        if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
            return 0;
        }
    }

    return 1;
}
예제 #4
0
VideoDecoder::~VideoDecoder()
{
    cuvidDestroyDecoder(oDecoder_);
}