示例#1
0
static int scalecuda_resize(AVFilterContext *ctx,
                            AVFrame *out, AVFrame *in)
{
    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
    CUDAScaleContext *s = ctx->priv;

    switch (in_frames_ctx->sw_format) {
    case AV_PIX_FMT_YUV420P:
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0]+in->linesize[0]*in->height, in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[0]+out->linesize[0]*out->height, out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0]+ ALIGN_UP((in->linesize[0]*in->height*5)/4, s->tex_alignment), in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[0]+(out->linesize[0]*out->height*5)/4, out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        break;
    case AV_PIX_FMT_YUV444P:
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0]+in->linesize[0]*in->height, in->width, in->height, in->linesize[0],
                           out->data[0]+out->linesize[0]*out->height, out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0]+in->linesize[0]*in->height*2, in->width, in->height, in->linesize[0],
                           out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0],
                           1);
        break;
    case AV_PIX_FMT_NV12:
        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(s, s->cu_func_uchar2, s->cu_tex_uchar2, 2,
                           in->data[1], in->width/2, in->height/2, in->linesize[1],
                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width/2, out->height/2, out->linesize[1]/2,
                           1);
        break;
    case AV_PIX_FMT_P010LE:
        call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0]/2,
                           out->data[0], out->width, out->height, out->linesize[0]/2,
                           2);
        call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    case AV_PIX_FMT_P016LE:
        call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    default:
        return AVERROR_BUG;
    }

    return 0;
}
示例#2
0
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
                              uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                              uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                              int pixel_size)
{
    CUDAScaleContext *s = ctx->priv;
    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
    CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
    CUtexObject tex = 0;
    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
    int ret;

    CUDA_TEXTURE_DESC tex_desc = {
        .filterMode = CU_TR_FILTER_MODE_LINEAR,
        .flags = CU_TRSF_READ_AS_INTEGER,
    };

    CUDA_RESOURCE_DESC res_desc = {
        .resType = CU_RESOURCE_TYPE_PITCH2D,
        .res.pitch2D.format = pixel_size == 1 ?
                              CU_AD_FORMAT_UNSIGNED_INT8 :
                              CU_AD_FORMAT_UNSIGNED_INT16,
        .res.pitch2D.numChannels = channels,
        .res.pitch2D.width = src_width,
        .res.pitch2D.height = src_height,
        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
        .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
    };

    ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
    if (ret < 0)
        goto exit;

    ret = CHECK_CU(cu->cuLaunchKernel(func,
                                      DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
                                      BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));

exit:
    if (tex)
        CHECK_CU(cu->cuTexObjectDestroy(tex));

    return ret;
}

static int scalecuda_resize(AVFilterContext *ctx,
                            AVFrame *out, AVFrame *in)
{
    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
    CUDAScaleContext *s = ctx->priv;

    switch (in_frames_ctx->sw_format) {
    case AV_PIX_FMT_YUV420P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[1], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width/2, in->height/2, in->linesize[0]/2,
                           out->data[2], out->width/2, out->height/2, out->linesize[0]/2,
                           1);
        break;
    case AV_PIX_FMT_YUV444P:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[1], in->width, in->height, in->linesize[0],
                           out->data[1], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[2], in->width, in->height, in->linesize[0],
                           out->data[2], out->width, out->height, out->linesize[0],
                           1);
        break;
    case AV_PIX_FMT_YUV444P16:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[1], in->width, in->height, in->linesize[1] / 2,
                           out->data[1], out->width, out->height, out->linesize[1] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[2], in->width, in->height, in->linesize[2] / 2,
                           out->data[2], out->width, out->height, out->linesize[2] / 2,
                           2);
        break;
    case AV_PIX_FMT_NV12:
        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                           in->data[0], in->width, in->height, in->linesize[0],
                           out->data[0], out->width, out->height, out->linesize[0],
                           1);
        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                           in->data[1], in->width/2, in->height/2, in->linesize[1],
                           out->data[1], out->width/2, out->height/2, out->linesize[1]/2,
                           1);
        break;
    case AV_PIX_FMT_P010LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0]/2,
                           out->data[0], out->width, out->height, out->linesize[0]/2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    case AV_PIX_FMT_P016LE:
        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                           in->data[0], in->width, in->height, in->linesize[0] / 2,
                           out->data[0], out->width, out->height, out->linesize[0] / 2,
                           2);
        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                           2);
        break;
    default:
        return AVERROR_BUG;
    }

    return 0;
}