示例#1
0
static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
{
    AVHWFramesContext     *ctx = opaque;
    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
    CudaFunctions          *cu = hwctx->internal->cuda_dl;

    AVBufferRef *ret = NULL;
    CUcontext dummy = NULL;
    CUdeviceptr data;
    CUresult err;

    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS) {
        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
        return NULL;
    }

    err = cu->cuMemAlloc(&data, size);
    if (err != CUDA_SUCCESS)
        goto fail;

    ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
    if (!ret) {
        cu->cuMemFree(data);
        goto fail;
    }

fail:
    cu->cuCtxPopCurrent(&dummy);
    return ret;
}
示例#2
0
static void cuda_buffer_free(void *opaque, uint8_t *data)
{
    AVHWFramesContext *ctx = opaque;
    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
    CudaFunctions *cu = hwctx->internal->cuda_dl;

    CUcontext dummy;

    cu->cuCtxPushCurrent(hwctx->cuda_ctx);

    cu->cuMemFree((CUdeviceptr)data);

    cu->cuCtxPopCurrent(&dummy);
}
示例#3
0
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
                              AVDictionary *opts, int flags)
{
    AVCUDADeviceContext *hwctx = ctx->hwctx;
    CudaFunctions *cu;
    CUdevice cu_device;
    CUcontext dummy;
    CUresult err;
    int device_idx = 0;

    if (device)
        device_idx = strtol(device, NULL, 0);

    if (cuda_device_init(ctx) < 0)
        goto error;

    cu = hwctx->internal->cuda_dl;

    err = cu->cuInit(0);
    if (err != CUDA_SUCCESS) {
        av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
        goto error;
    }

    err = cu->cuDeviceGet(&cu_device, device_idx);
    if (err != CUDA_SUCCESS) {
        av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
        goto error;
    }

    err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
    if (err != CUDA_SUCCESS) {
        av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
        goto error;
    }

    cu->cuCtxPopCurrent(&dummy);

    hwctx->internal->is_allocated = 1;

    return 0;

error:
    cuda_device_uninit(ctx);
    return AVERROR_UNKNOWN;
}
示例#4
0
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
{
    AVFilterContext       *ctx = link->dst;
    CUDAScaleContext        *s = ctx->priv;
    AVFilterLink      *outlink = ctx->outputs[0];
    CudaFunctions          *cu = s->hwctx->internal->cuda_dl;

    AVFrame *out = NULL;
    CUcontext dummy;
    int ret = 0;

    out = av_frame_alloc();
    if (!out) {
        ret = AVERROR(ENOMEM);
        goto fail;
    }

    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = cudascale_scale(ctx, out, in);

    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
    if (ret < 0)
        goto fail;

    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
              INT_MAX);

    av_frame_free(&in);
    return ff_filter_frame(outlink, out);
fail:
    av_frame_free(&in);
    av_frame_free(&out);
    return ret;
}
示例#5
0
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                   const AVFrame *src)
{
    CUDAFramesContext           *priv = ctx->internal->priv;
    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
    CUresult err;
    int i;

    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS)
        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
            .dstMemoryType = CU_MEMORYTYPE_HOST,
            .srcDevice     = (CUdeviceptr)src->data[i],
            .dstHost       = dst->data[i],
            .srcPitch      = src->linesize[i],
            .dstPitch      = dst->linesize[i],
            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

        err = cu->cuMemcpy2D(&cpy);
        if (err != CUDA_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
            return AVERROR_UNKNOWN;
        }
    }

    cu->cuCtxPopCurrent(&dummy);

    return 0;
}

static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
                                 const AVFrame *src)
{
    CUDAFramesContext           *priv = ctx->internal->priv;
    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
    CUresult err;
    int i;

    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
    if (err != CUDA_SUCCESS)
        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
            .srcMemoryType = CU_MEMORYTYPE_HOST,
            .dstMemoryType = CU_MEMORYTYPE_DEVICE,
            .srcHost       = src->data[i],
            .dstDevice     = (CUdeviceptr)dst->data[i],
            .srcPitch      = src->linesize[i],
            .dstPitch      = dst->linesize[i],
            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

        err = cu->cuMemcpy2D(&cpy);
        if (err != CUDA_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
            return AVERROR_UNKNOWN;
        }
    }

    cu->cuCtxPopCurrent(&dummy);

    return 0;
}

static void cuda_device_uninit(AVHWDeviceContext *ctx)
{
    AVCUDADeviceContext *hwctx = ctx->hwctx;

    if (hwctx->internal) {
        if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
            hwctx->cuda_ctx = NULL;
        }
        cuda_free_functions(&hwctx->internal->cuda_dl);
    }

    av_freep(&hwctx->internal);
}
示例#6
0
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
                              uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                              uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                              int pixel_size)
{
    CUDAScaleContext *s = ctx->priv;
    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
    CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
    CUtexObject tex = 0;
    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
    int ret;

    CUDA_TEXTURE_DESC tex_desc = {
        .filterMode = CU_TR_FILTER_MODE_LINEAR,
        .flags = CU_TRSF_READ_AS_INTEGER,
    };

    CUDA_RESOURCE_DESC res_desc = {
        .resType = CU_RESOURCE_TYPE_PITCH2D,
        .res.pitch2D.format = pixel_size == 1 ?
                              CU_AD_FORMAT_UNSIGNED_INT8 :
                              CU_AD_FORMAT_UNSIGNED_INT16,
        .res.pitch2D.numChannels = channels,
        .res.pitch2D.width = src_width,
        .res.pitch2D.height = src_height,
        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
        .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
    };

    ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
    if (ret < 0)
        goto exit;

    ret = CHECK_CU(cu->cuLaunchKernel(func,
                                      DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
                                      BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));

exit:
    if (tex)
        CHECK_CU(cu->cuTexObjectDestroy(tex));

    return ret;
}

static int scalecuda_resize(AVFilterContext *ctx,
                            AVFrame *out, AVFrame *in)
{
    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
    CUDAScaleContext *s = ctx->priv;

    switch (in_frames_ctx->sw_format) {
    case AV_PIX_FMT_YUV420P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[1], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[2], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        break;
    case AV_PIX_FMT_YUV444P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width, in->height, in->linesize[0],
                           out->data[1], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width, in->height, in->linesize[0],
                           out->data[2], out->width, out->height, out->linesize[0],
                           1);
        break;
    case AV_PIX_FMT_YUV444P16:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[1], in->width, in->height, in->linesize[1] / 2,
                           out->data[1], out->width, out->height, out->linesize[1] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[2], in->width, in->height, in->linesize[2] / 2,
                           out->data[2], out->width, out->height, out->linesize[2] / 2,
                           2);
        break;
    case AV_PIX_FMT_NV12:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                           in->data[1], in->width/2, in->height/2, in->linesize[1],
                           out->data[1], out->width/2, out->height/2, out->linesize[1]/2,
                           1);
        break;
    case AV_PIX_FMT_P010LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0]/2,
                           out->data[0], out->width, out->height, out->linesize[0]/2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    case AV_PIX_FMT_P016LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    default:
        return AVERROR_BUG;
    }

    return 0;
}
示例#7
0
static av_cold int cudascale_config_props(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    AVFilterLink *inlink = outlink->src->inputs[0];
    CUDAScaleContext *s  = ctx->priv;
    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    CudaFunctions *cu = device_hwctx->internal->cuda_dl;
    int w, h;
    int ret;

    extern char vf_scale_cuda_ptx[];

    s->hwctx = device_hwctx;
    s->cu_stream = s->hwctx->stream;

    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
    if (ret < 0)
        goto fail;

    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));
    if (ret < 0)
        goto fail;


    CHECK_CU(cu->cuCtxPopCurrent(&dummy));

    if ((ret = ff_scale_eval_dimensions(s,
                                        s->w_expr, s->h_expr,
                                        inlink, outlink,
                                        &w, &h)) < 0)
        goto fail;

    if (((int64_t)h * inlink->w) > INT_MAX  ||
        ((int64_t)w * inlink->h) > INT_MAX)
        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");

    outlink->w = w;
    outlink->h = h;

    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
    if (ret < 0)
        return ret;

    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
           inlink->w, inlink->h, outlink->w, outlink->h);

    if (inlink->sample_aspect_ratio.num) {
        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
                                                             outlink->w*inlink->h},
                                                inlink->sample_aspect_ratio);
    } else {
        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
    }

    return 0;

fail:
    return ret;
}