Esempio n. 1
0
static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
{
    AVHWFramesContext     *ctx = opaque;
    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
    CudaFunctions          *cu = hwctx->internal->cuda_dl;

    AVBufferRef *ret = NULL;
    CUcontext dummy = NULL;
    CUdeviceptr data;
    CUresult err;

    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS) {
        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
        return NULL;
    }

    err = cu->cuMemAlloc(&data, size);
    if (err != CUDA_SUCCESS)
        goto fail;

    ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
    if (!ret) {
        cu->cuMemFree(data);
        goto fail;
    }

fail:
    cu->cuCtxPopCurrent(&dummy);
    return ret;
}
Esempio n. 2
0
static void cuda_buffer_free(void *opaque, uint8_t *data)
{
    AVHWFramesContext *ctx = opaque;
    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
    CudaFunctions *cu = hwctx->internal->cuda_dl;

    CUcontext dummy;

    cu->cuCtxPushCurrent(hwctx->cuda_ctx);

    cu->cuMemFree((CUdeviceptr)data);

    cu->cuCtxPopCurrent(&dummy);
}
Esempio n. 3
0
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
{
    AVFilterContext       *ctx = link->dst;
    CUDAScaleContext        *s = ctx->priv;
    AVFilterLink      *outlink = ctx->outputs[0];
    CudaFunctions          *cu = s->hwctx->internal->cuda_dl;

    AVFrame *out = NULL;
    CUcontext dummy;
    int ret = 0;

    out = av_frame_alloc();
    if (!out) {
        ret = AVERROR(ENOMEM);
        goto fail;
    }

    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = cudascale_scale(ctx, out, in);

    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
    if (ret < 0)
        goto fail;

    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
              INT_MAX);

    av_frame_free(&in);
    return ff_filter_frame(outlink, out);
fail:
    av_frame_free(&in);
    av_frame_free(&out);
    return ret;
}
Esempio n. 4
0
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                   const AVFrame *src)
{
    CUDAFramesContext           *priv = ctx->internal->priv;
    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
    CUresult err;
    int i;

    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS)
        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
            .dstMemoryType = CU_MEMORYTYPE_HOST,
            .srcDevice     = (CUdeviceptr)src->data[i],
            .dstHost       = dst->data[i],
            .srcPitch      = src->linesize[i],
            .dstPitch      = dst->linesize[i],
            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

        err = cu->cuMemcpy2D(&cpy);
        if (err != CUDA_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
            return AVERROR_UNKNOWN;
        }
    }

    cu->cuCtxPopCurrent(&dummy);

    return 0;
}

static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
                                 const AVFrame *src)
{
    CUDAFramesContext           *priv = ctx->internal->priv;
    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
    CUresult err;
    int i;

    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS)
        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
            .srcMemoryType = CU_MEMORYTYPE_HOST,
            .dstMemoryType = CU_MEMORYTYPE_DEVICE,
            .srcHost       = src->data[i],
            .dstDevice     = (CUdeviceptr)dst->data[i],
            .srcPitch      = src->linesize[i],
            .dstPitch      = dst->linesize[i],
            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

        err = cu->cuMemcpy2D(&cpy);
        if (err != CUDA_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
            return AVERROR_UNKNOWN;
        }
    }

    cu->cuCtxPopCurrent(&dummy);

    return 0;
}

static void cuda_device_uninit(AVHWDeviceContext *ctx)
{
    AVCUDADeviceContext *hwctx = ctx->hwctx;

    if (hwctx->internal) {
        if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
            hwctx->cuda_ctx = NULL;
        }
        cuda_free_functions(&hwctx->internal->cuda_dl);
    }

    av_freep(&hwctx->internal);
}
Esempio n. 5
0
static av_cold int cudascale_config_props(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    AVFilterLink *inlink = outlink->src->inputs[0];
    CUDAScaleContext *s  = ctx->priv;
    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    CudaFunctions *cu = device_hwctx->internal->cuda_dl;
    int w, h;
    int ret;

    extern char vf_scale_cuda_ptx[];

    s->hwctx = device_hwctx;
    s->cu_stream = s->hwctx->stream;

    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));
    if (ret < 0)
        goto fail;


    CHECK_CU(cu->cuCtxPopCurrent(&dummy));

    if ((ret = ff_scale_eval_dimensions(s,
                                        s->w_expr, s->h_expr,
                                        inlink, outlink,
                                        &w, &h)) < 0)
        goto fail;

    if (((int64_t)h * inlink->w) > INT_MAX  ||
        ((int64_t)w * inlink->h) > INT_MAX)
        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");

    outlink->w = w;
    outlink->h = h;

    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
    if (ret < 0)
        return ret;

    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
           inlink->w, inlink->h, outlink->w, outlink->h);

    if (inlink->sample_aspect_ratio.num) {
        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
                                                             outlink->w*inlink->h},
                                                inlink->sample_aspect_ratio);
    } else {
        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
    }

    return 0;

fail:
    return ret;
}