static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) { CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; void *args_uchar[] = { &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height }; CUDA_ARRAY_DESCRIPTOR desc; desc.Width = src_width; desc.Height = src_height; desc.NumChannels = channels; if (pixel_size == 1) { desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; } else { desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size)); CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL)); return 0; }
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) { AVFilterContext *ctx = link->dst; CUDAScaleContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)s->frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; CUcontext dummy; int ret = 0; out = av_frame_alloc(); if (!out) { ret = AVERROR(ENOMEM); goto fail; } ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); if (ret < 0) goto fail; ret = cudascale_scale(ctx, out, in); CHECK_CU(cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, INT_MAX); av_frame_free(&in); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); av_frame_free(&out); return ret; }
static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams) { AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n"); ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams)); if (ctx->internal_error < 0) return 0; return 1; }
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; CUVIDDECODECREATEINFO cuinfo; av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence); ctx->internal_error = 0; avctx->width = format->display_area.right; avctx->height = format->display_area.bottom; ff_set_sar(avctx, av_div_q( (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y }, (AVRational){ avctx->width, avctx->height })); if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT; else avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT; if (format->video_signal_description.video_full_range_flag) avctx->color_range = AVCOL_RANGE_JPEG; else avctx->color_range = AVCOL_RANGE_MPEG; avctx->color_primaries = format->video_signal_description.color_primaries; avctx->color_trc = format->video_signal_description.transfer_characteristics; avctx->colorspace = format->video_signal_description.matrix_coefficients; if (format->bitrate) avctx->bit_rate = format->bitrate; if (format->frame_rate.numerator && format->frame_rate.denominator) { avctx->framerate.num = format->frame_rate.numerator; avctx->framerate.den = format->frame_rate.denominator; } if (ctx->cudecoder && avctx->coded_width == format->coded_width && avctx->coded_height == format->coded_height && ctx->chroma_format == format->chroma_format && ctx->codec_type == format->codec) return 1; if (ctx->cudecoder) { av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n"); ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder)); if (ctx->internal_error < 0) return 0; ctx->cudecoder = NULL; } if (hwframe_ctx->pool && ( hwframe_ctx->width < avctx->width || hwframe_ctx->height < avctx->height || hwframe_ctx->format != AV_PIX_FMT_CUDA || hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) { av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } if (format->chroma_format != cudaVideoChromaFormat_420) { av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } avctx->coded_width = format->coded_width; avctx->coded_height = format->coded_height; ctx->chroma_format = format->chroma_format; memset(&cuinfo, 0, sizeof(cuinfo)); cuinfo.CodecType = ctx->codec_type = format->codec; cuinfo.ChromaFormat = format->chroma_format; cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; cuinfo.ulWidth = avctx->coded_width; cuinfo.ulHeight = avctx->coded_height; cuinfo.ulTargetWidth = cuinfo.ulWidth; cuinfo.ulTargetHeight = cuinfo.ulHeight; cuinfo.target_rect.left = 0; cuinfo.target_rect.top = 0; cuinfo.target_rect.right = cuinfo.ulWidth; cuinfo.target_rect.bottom = cuinfo.ulHeight; cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; if (format->progressive_sequence) { ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; } else { cuinfo.DeinterlaceMode = ctx->deint_mode; } if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); if (ctx->internal_error < 0) return 0; if (!hwframe_ctx->pool) { hwframe_ctx->format = AV_PIX_FMT_CUDA; hwframe_ctx->sw_format = AV_PIX_FMT_NV12; hwframe_ctx->width = avctx->width; hwframe_ctx->height = avctx->height; if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n"); return 0; } } return 1; }
static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUdeviceptr mapped_frame = 0; int ret = 0, eret = 0; av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n"); if (ctx->decoder_flushing) { ret = cuvid_decode_packet(avctx, NULL); if (ret < 0 && ret != AVERROR_EOF) return ret; } ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; if (av_fifo_size(ctx->frame_queue)) { CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; int offset = 0; int i; av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); memset(¶ms, 0, sizeof(params)); params.progressive_frame = parsed_frame.dispinfo.progressive_frame; params.second_field = parsed_frame.second_field; params.top_field_first = parsed_frame.dispinfo.top_field_first; ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n"); goto error; } ret = ff_decode_frame_props(avctx, frame); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n"); goto error; } for (i = 0; i < 2; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = mapped_frame, .dstDevice = (CUdeviceptr)frame->data[i], .srcPitch = pitch, .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), .Height = avctx->height >> (i ? 1 : 0), }; ret = CHECK_CU(cuMemcpy2D(&cpy)); if (ret < 0) goto error; offset += avctx->coded_height; } } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUVIDSOURCEDATAPACKET cupkt; AVPacket filter_packet = { 0 }; AVPacket filtered_packet = { 0 }; int ret = 0, eret = 0, is_flush = ctx->decoder_flushing; av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n"); if (is_flush && avpkt && avpkt->size) return AVERROR_EOF; if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size) return AVERROR(EAGAIN); if (ctx->bsf && avpkt && avpkt->size) { if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); return ret; } if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); av_packet_unref(&filter_packet); return ret; } if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); return ret; } avpkt = &filtered_packet; } ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) { av_packet_unref(&filtered_packet); return ret; } memset(&cupkt, 0, sizeof(cupkt)); if (avpkt && avpkt->size) { cupkt.payload_size = avpkt->size; cupkt.payload = avpkt->data; if (avpkt->pts != AV_NOPTS_VALUE) { cupkt.flags = CUVID_PKT_TIMESTAMP; if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000}); else cupkt.timestamp = avpkt->pts; } } else { cupkt.flags = CUVID_PKT_ENDOFSTREAM; ctx->decoder_flushing = 1; } ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt)); av_packet_unref(&filtered_packet); if (ret < 0) goto error; // cuvidParseVideoData doesn't return an error just because stuff failed... if (ctx->internal_error) { av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n"); ret = ctx->internal_error; goto error; } error: eret = CHECK_CU(cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; else if (ret < 0) return ret; else if (is_flush) return AVERROR_EOF; else return 0; }
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; CUVIDDECODECAPS *caps = NULL; CUVIDDECODECREATEINFO cuinfo; int surface_fmt; int old_width = avctx->width; int old_height = avctx->height; enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, // Will be updated below AV_PIX_FMT_NONE }; av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence); memset(&cuinfo, 0, sizeof(cuinfo)); ctx->internal_error = 0; avctx->coded_width = cuinfo.ulWidth = format->coded_width; avctx->coded_height = cuinfo.ulHeight = format->coded_height; // apply cropping cuinfo.display_area.left = format->display_area.left + ctx->crop.left; cuinfo.display_area.top = format->display_area.top + ctx->crop.top; cuinfo.display_area.right = format->display_area.right - ctx->crop.right; cuinfo.display_area.bottom = format->display_area.bottom - ctx->crop.bottom; // width and height need to be set before calling ff_get_format if (ctx->resize_expr) { avctx->width = ctx->resize.width; avctx->height = ctx->resize.height; } else { avctx->width = cuinfo.display_area.right - cuinfo.display_area.left; avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top; } // target width/height need to be multiples of two cuinfo.ulTargetWidth = avctx->width = (avctx->width + 1) & ~1; cuinfo.ulTargetHeight = avctx->height = (avctx->height + 1) & ~1; // aspect ratio conversion, 1:1, depends on scaled resolution cuinfo.target_rect.left = 0; cuinfo.target_rect.top = 0; cuinfo.target_rect.right = cuinfo.ulTargetWidth; cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; switch (format->bit_depth_luma_minus8) { case 0: // 8-bit pix_fmts[1] = AV_PIX_FMT_NV12; caps = &ctx->caps8; break; case 2: // 10-bit pix_fmts[1] = AV_PIX_FMT_P010; caps = &ctx->caps10; break; case 4: // 12-bit pix_fmts[1] = AV_PIX_FMT_P016; caps = &ctx->caps12; break; default: break; } if (!caps || !caps->bIsSupported) { av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n", format->bit_depth_luma_minus8 + 8); ctx->internal_error = AVERROR(EINVAL); return 0; } surface_fmt = ff_get_format(avctx, pix_fmts); if (surface_fmt < 0) { av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt); ctx->internal_error = AVERROR(EINVAL); return 0; } av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n", av_get_pix_fmt_name(avctx->pix_fmt), av_get_pix_fmt_name(surface_fmt), av_get_pix_fmt_name(avctx->sw_pix_fmt)); avctx->pix_fmt = surface_fmt; // Update our hwframe ctx, as the get_format callback might have refreshed it! if (avctx->hw_frames_ctx) { av_buffer_unref(&ctx->hwframe); ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx); if (!ctx->hwframe) { ctx->internal_error = AVERROR(ENOMEM); return 0; } hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; } ff_set_sar(avctx, av_div_q( (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y }, (AVRational){ avctx->width, avctx->height })); ctx->deint_mode_current = format->progressive_sequence ? cudaVideoDeinterlaceMode_Weave : ctx->deint_mode; if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT; else avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT; if (format->video_signal_description.video_full_range_flag) avctx->color_range = AVCOL_RANGE_JPEG; else avctx->color_range = AVCOL_RANGE_MPEG; avctx->color_primaries = format->video_signal_description.color_primaries; avctx->color_trc = format->video_signal_description.transfer_characteristics; avctx->colorspace = format->video_signal_description.matrix_coefficients; if (format->bitrate) avctx->bit_rate = format->bitrate; if (format->frame_rate.numerator && format->frame_rate.denominator) { avctx->framerate.num = format->frame_rate.numerator; avctx->framerate.den = format->frame_rate.denominator; } if (ctx->cudecoder && avctx->coded_width == format->coded_width && avctx->coded_height == format->coded_height && avctx->width == old_width && avctx->height == old_height && ctx->chroma_format == format->chroma_format && ctx->codec_type == format->codec) return 1; if (ctx->cudecoder) { av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n"); ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder)); if (ctx->internal_error < 0) return 0; ctx->cudecoder = NULL; } if (hwframe_ctx->pool && ( hwframe_ctx->width < avctx->width || hwframe_ctx->height < avctx->height || hwframe_ctx->format != AV_PIX_FMT_CUDA || hwframe_ctx->sw_format != avctx->sw_pix_fmt)) { av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n"); av_log(avctx, AV_LOG_DEBUG, "width: %d <-> %d\n", hwframe_ctx->width, avctx->width); av_log(avctx, AV_LOG_DEBUG, "height: %d <-> %d\n", hwframe_ctx->height, avctx->height); av_log(avctx, AV_LOG_DEBUG, "format: %s <-> cuda\n", av_get_pix_fmt_name(hwframe_ctx->format)); av_log(avctx, AV_LOG_DEBUG, "sw_format: %s <-> %s\n", av_get_pix_fmt_name(hwframe_ctx->sw_format), av_get_pix_fmt_name(avctx->sw_pix_fmt)); ctx->internal_error = AVERROR(EINVAL); return 0; } if (format->chroma_format != cudaVideoChromaFormat_420) { av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } ctx->chroma_format = format->chroma_format; cuinfo.CodecType = ctx->codec_type = format->codec; cuinfo.ChromaFormat = format->chroma_format; switch (avctx->sw_pix_fmt) { case AV_PIX_FMT_NV12: cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; break; case AV_PIX_FMT_P010: case AV_PIX_FMT_P016: cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; break; default: av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); ctx->internal_error = AVERROR(EINVAL); return 0; } cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; cuinfo.DeinterlaceMode = ctx->deint_mode_current; if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); if (ctx->internal_error < 0) return 0; if (!hwframe_ctx->pool) { hwframe_ctx->format = AV_PIX_FMT_CUDA; hwframe_ctx->sw_format = avctx->sw_pix_fmt; hwframe_ctx->width = avctx->width; hwframe_ctx->height = avctx->height; if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n"); return 0; } } return 1; }
static av_cold int cudascale_config_props(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AVFilterLink *inlink = outlink->src->inputs[0]; CUDAScaleContext *s = ctx->priv; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; int w, h; int ret; extern char vf_scale_cuda_ptx[]; ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto fail; ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); if (ret < 0) goto fail; CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &w, &h)) < 0) goto fail; if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX) av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); outlink->w = w; outlink->h = h; ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); if (ret < 0) return ret; av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", inlink->w, inlink->h, outlink->w, outlink->h); if (inlink->sample_aspect_ratio.num) { outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, outlink->w*inlink->h}, inlink->sample_aspect_ratio); } else { outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; } return 0; fail: return ret; }
static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUdeviceptr mapped_frame = 0; int ret = 0, eret = 0; av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n"); if (ctx->decoder_flushing) { ret = cuvid_decode_packet(avctx, NULL); if (ret < 0 && ret != AVERROR_EOF) return ret; } if (!cuvid_is_buffer_full(avctx)) { AVPacket pkt = {0}; ret = ff_decode_get_packet(avctx, &pkt); if (ret < 0 && ret != AVERROR_EOF) return ret; ret = cuvid_decode_packet(avctx, &pkt); av_packet_unref(&pkt); // cuvid_is_buffer_full() should avoid this. if (ret == AVERROR(EAGAIN)) ret = AVERROR_EXTERNAL; if (ret < 0 && ret != AVERROR_EOF) return ret; } ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; if (av_fifo_size(ctx->frame_queue)) { const AVPixFmtDescriptor *pixdesc; CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; int offset = 0; int i; av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); memset(¶ms, 0, sizeof(params)); params.progressive_frame = parsed_frame.dispinfo.progressive_frame; params.second_field = parsed_frame.second_field; params.top_field_first = parsed_frame.dispinfo.top_field_first; ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n"); goto error; } ret = ff_decode_frame_props(avctx, frame); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n"); goto error; } pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); for (i = 0; i < pixdesc->nb_components; i++) { int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0); CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = mapped_frame, .dstDevice = (CUdeviceptr)frame->data[i], .srcPitch = pitch, .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), .Height = height, }; ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream)); if (ret < 0) goto error; offset += height; } } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || avctx->pix_fmt == AV_PIX_FMT_P010 || avctx->pix_fmt == AV_PIX_FMT_P016 || avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUV444P16) { unsigned int offset = 0; AVFrame *tmp_frame = av_frame_alloc(); if (!tmp_frame) { av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); ret = AVERROR(ENOMEM); goto error; } pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); tmp_frame->format = AV_PIX_FMT_CUDA; tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe); tmp_frame->width = avctx->width; tmp_frame->height = avctx->height; /* * Note that the following logic would not work for three plane * YUV420 because the pitch value is different for the chroma * planes. */ for (i = 0; i < pixdesc->nb_components; i++) { tmp_frame->data[i] = (uint8_t*)mapped_frame + offset; tmp_frame->linesize[i] = pitch; offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0)); } ret = ff_get_buffer(avctx, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n"); av_frame_free(&tmp_frame); goto error; } ret = av_hwframe_transfer_data(frame, tmp_frame, 0); if (ret) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n"); av_frame_free(&tmp_frame); goto error; } av_frame_free(&tmp_frame); } else {
static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; AVFrame *frame = data; CUVIDSOURCEDATAPACKET cupkt; AVPacket filter_packet = { 0 }; AVPacket filtered_packet = { 0 }; CUdeviceptr mapped_frame = 0; int ret = 0, eret = 0; if (ctx->bsf && avpkt->size) { if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); return ret; } if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); av_packet_unref(&filter_packet); return ret; } if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); return ret; } avpkt = &filtered_packet; } ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) { av_packet_unref(&filtered_packet); return ret; } memset(&cupkt, 0, sizeof(cupkt)); if (avpkt->size) { cupkt.payload_size = avpkt->size; cupkt.payload = avpkt->data; if (avpkt->pts != AV_NOPTS_VALUE) { cupkt.flags = CUVID_PKT_TIMESTAMP; if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000}); else cupkt.timestamp = avpkt->pts; } } else { cupkt.flags = CUVID_PKT_ENDOFSTREAM; } ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt)); av_packet_unref(&filtered_packet); if (ret < 0) { goto error; } // cuvidParseVideoData doesn't return an error just because stuff failed... if (ctx->internal_error) { av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n"); ret = ctx->internal_error; goto error; } if (av_fifo_size(ctx->frame_queue)) { CUVIDPARSERDISPINFO dispinfo; CUVIDPROCPARAMS params; unsigned int pitch = 0; int offset = 0; int i; av_fifo_generic_read(ctx->frame_queue, &dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL); memset(¶ms, 0, sizeof(params)); params.progressive_frame = dispinfo.progressive_frame; params.second_field = 0; params.top_field_first = dispinfo.top_field_first; ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n"); goto error; } ret = ff_decode_frame_props(avctx, frame); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n"); goto error; } for (i = 0; i < 2; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = mapped_frame, .dstDevice = (CUdeviceptr)frame->data[i], .srcPitch = pitch, .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), .Height = avctx->coded_height >> (i ? 1 : 0), }; ret = CHECK_CU(cuMemcpy2D(&cpy)); if (ret < 0) goto error; offset += avctx->coded_height; } } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) { CUDAScaleContext *s = ctx->priv; CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; CUtexObject tex = 0; void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height }; int ret; CUDA_TEXTURE_DESC tex_desc = { .filterMode = CU_TR_FILTER_MODE_LINEAR, .flags = CU_TRSF_READ_AS_INTEGER, }; CUDA_RESOURCE_DESC res_desc = { .resType = CU_RESOURCE_TYPE_PITCH2D, .res.pitch2D.format = pixel_size == 1 ? CU_AD_FORMAT_UNSIGNED_INT8 : CU_AD_FORMAT_UNSIGNED_INT16, .res.pitch2D.numChannels = channels, .res.pitch2D.width = src_width, .res.pitch2D.height = src_height, .res.pitch2D.pitchInBytes = src_pitch * pixel_size, .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, }; ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL)); if (ret < 0) goto exit; ret = CHECK_CU(cu->cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL)); exit: if (tex) CHECK_CU(cu->cuTexObjectDestroy(tex)); return ret; } static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in) { AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; CUDAScaleContext *s = ctx->priv; switch (in_frames_ctx->sw_format) { case AV_PIX_FMT_YUV420P: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[1], in->width/2, in->height/2, in->linesize[0]/2, out->data[1], out->width/2, out->height/2, out->linesize[0]/2, 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[2], in->width/2, in->height/2, in->linesize[0]/2, out->data[2], out->width/2, out->height/2, out->linesize[0]/2, 1); break; case AV_PIX_FMT_YUV444P: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[1], in->width, in->height, in->linesize[0], out->data[1], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[2], in->width, in->height, in->linesize[0], out->data[2], out->width, out->height, out->linesize[0], 1); break; case AV_PIX_FMT_YUV444P16: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0] / 2, out->data[0], out->width, out->height, out->linesize[0] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[1], in->width, in->height, in->linesize[1] / 2, out->data[1], out->width, out->height, out->linesize[1] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[2], in->width, in->height, in->linesize[2] / 2, out->data[2], out->width, out->height, out->linesize[2] / 2, 2); break; case AV_PIX_FMT_NV12: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar2, 2, in->data[1], in->width/2, in->height/2, in->linesize[1], out->data[1], out->width/2, out->height/2, out->linesize[1]/2, 1); break; case AV_PIX_FMT_P010LE: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0]/2, out->data[0], out->width, out->height, out->linesize[0]/2, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2, in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2, out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4, 2); break; case AV_PIX_FMT_P016LE: call_resize_kernel(ctx, s->cu_func_ushort, 1, in->data[0], in->width, in->height, in->linesize[0] / 2, out->data[0], out->width, out->height, out->linesize[0] / 2, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2, in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2, out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4, 2); break; default: return AVERROR_BUG; } return 0; }