static AVBufferRef *cuda_pool_alloc(void *opaque, int size) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; CudaFunctions *cu = hwctx->internal->cuda_dl; AVBufferRef *ret = NULL; CUcontext dummy = NULL; CUdeviceptr data; CUresult err; err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); return NULL; } err = cu->cuMemAlloc(&data, size); if (err != CUDA_SUCCESS) goto fail; ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); if (!ret) { cu->cuMemFree(data); goto fail; } fail: cu->cuCtxPopCurrent(&dummy); return ret; }
static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; cu->cuCtxPushCurrent(hwctx->cuda_ctx); cu->cuMemFree((CUdeviceptr)data); cu->cuCtxPopCurrent(&dummy); }
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) { AVFilterContext *ctx = link->dst; CUDAScaleContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; CudaFunctions *cu = s->hwctx->internal->cuda_dl; AVFrame *out = NULL; CUcontext dummy; int ret = 0; out = av_frame_alloc(); if (!out) { ret = AVERROR(ENOMEM); goto fail; } ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); if (ret < 0) goto fail; ret = cudascale_scale(ctx, out, in); CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, INT_MAX); av_frame_free(&in); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); av_frame_free(&out); return ret; }
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_HOST, .srcDevice = (CUdeviceptr)src->data[i], .dstHost = dst->data[i], .srcPitch = src->linesize[i], .dstPitch = dst->linesize[i], .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]), .Height = src->height >> (i ? priv->shift_height : 0), }; err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } cu->cuCtxPopCurrent(&dummy); return 0; } static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_HOST, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcHost = src->data[i], .dstDevice = (CUdeviceptr)dst->data[i], .srcPitch = src->linesize[i], .dstPitch = dst->linesize[i], .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]), .Height = src->height >> (i ? priv->shift_height : 0), }; err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } cu->cuCtxPopCurrent(&dummy); return 0; } static void cuda_device_uninit(AVHWDeviceContext *ctx) { AVCUDADeviceContext *hwctx = ctx->hwctx; if (hwctx->internal) { if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); hwctx->cuda_ctx = NULL; } cuda_free_functions(&hwctx->internal->cuda_dl); } av_freep(&hwctx->internal); }
static av_cold int cudascale_config_props(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AVFilterLink *inlink = outlink->src->inputs[0]; CUDAScaleContext *s = ctx->priv; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CudaFunctions *cu = device_hwctx->internal->cuda_dl; int w, h; int ret; extern char vf_scale_cuda_ptx[]; s->hwctx = device_hwctx; s->cu_stream = s->hwctx->stream; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto fail; ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); if (ret < 0) goto fail; CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); if (ret < 0) goto fail; CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &w, &h)) < 0) goto fail; if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX) av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); outlink->w = w; outlink->h = h; ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); if (ret < 0) return ret; av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", inlink->w, inlink->h, outlink->w, outlink->h); if (inlink->sample_aspect_ratio.num) { outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, outlink->w*inlink->h}, inlink->sample_aspect_ratio); } else { outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; } return 0; fail: return ret; }