示例#1
0
av_cold void ff_v210enc_init_x86(V210EncContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSSE3(cpu_flags)) {
        s->pack_line_8 = ff_v210_planar_pack_8_ssse3;
        s->pack_line_10 = ff_v210_planar_pack_10_ssse3;
    }

    if (EXTERNAL_AVX(cpu_flags))
        s->pack_line_8 = ff_v210_planar_pack_8_avx;

    if (EXTERNAL_AVX2(cpu_flags)) {
        s->pack_line_8 = ff_v210_planar_pack_8_avx2;
        s->pack_line_10 = ff_v210_planar_pack_10_avx2;
        s->sample_factor = 2;
    }
}
av_cold void INIT_FUNC(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
    }

#endif /* HAVE_YASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}
示例#3
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt)                            \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_ ## type ## sz ## _ ## opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _ ## opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);

#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0, put, avx2);
        init_subpel3_32_64(1, avg, avx2);
#endif /* ARCH_X86_64 && HAVE_AVX2_EXTERNAL */
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_X86ASM */
}
示例#4
0
av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                 enum AVSampleFormat out_fmt,
                                 enum AVSampleFormat in_fmt,
                                 int channels){
    int mm_flags = av_get_cpu_flags();

    ac->simd_f= NULL;

//FIXME add memcpy case

#define MULTI_CAPS_FUNC(flag, cap) \
    if (EXTERNAL_##flag(mm_flags)) {\
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
            ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
    }

MULTI_CAPS_FUNC(MMX, mmx)
MULTI_CAPS_FUNC(SSE2, sse2)

    if(EXTERNAL_MMX(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
        }
    }
    if(EXTERNAL_SSE(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_sse;
        }
    }
    if(EXTERNAL_SSE2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
            ac->simd_f =  ff_int16_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int16_a_sse2;

        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
        }
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse2;
        }
    }
    if(EXTERNAL_SSSE3(mm_flags)) {
        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
        }
    }
    if(EXTERNAL_AVX(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_avx;
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
        }
    }
    if(EXTERNAL_AVX2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_avx2;
    }
}
示例#5
0
av_cold void INIT_FUNC(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt

#define init_lpf_funcs(bpp, opt) \
    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
    init_lpf_16_func(0, h, bpp, opt); \
    init_lpf_16_func(1, v, bpp, opt); \
    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
        init_lpf_funcs(BPC, sse2);
        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_lpf_funcs(BPC, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_lpf_funcs(BPC, avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
#if HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
#endif
    }

#endif /* HAVE_YASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}
示例#6
0
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#define SET_LUMA_FUNCS(tabname, funcname, depth, cf)      \
    c->tabname[0] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[1] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf;

#define SET_CHROMA_FUNCS(tabname, funcname, depth, cf)    \
    c->tabname[1] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf;

#define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS  (put_hevc_qpel[v][h], name, depth, cf)
#define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf)

    if (bit_depth == 8) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
            SET_QPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     8, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2);
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            SET_QPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_qpel_v);
            SET_EPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_epel_v);

        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_8_avx;
            c->idct[1] = ff_hevc_idct_8x8_8_avx;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
            SET_QPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     10, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_10_avx;
            c->idct[1] = ff_hevc_idct_8x8_10_avx;
        }
    }

#if ARCH_X86_64
    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_8_sse2;
            c->idct[3] = ff_hevc_idct_32x32_8_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }

        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     8, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4);
        }

        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_8_avx;
            c->idct[3] = ff_hevc_idct_32x32_8_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_10_sse2;
            c->idct[3] = ff_hevc_idct_32x32_10_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     10, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(0, 1, 10, avx, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 10, avx, ff_hevc_qpel_v);
            SET_QPEL_FUNCS(1, 1, 10, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v);
            SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_10_avx;
            c->idct[3] = ff_hevc_idct_32x32_10_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
        }
    }
#endif /* ARCH_X86_64 */
}
示例#7
0
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
            c->transform_add[0]    =  ff_hevc_transform_add4_8_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
            }
            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;

            c->transform_add[1]    = ff_hevc_transform_add8_8_sse2;
            c->transform_add[2]    = ff_hevc_transform_add16_8_sse2;
            c->transform_add[3]    = ff_hevc_transform_add32_8_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
            }
            c->transform_add[1]    = ff_hevc_transform_add8_8_avx;
            c->transform_add[2]    = ff_hevc_transform_add16_8_avx;
            c->transform_add[3]    = ff_hevc_transform_add32_8_avx;
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;

            c->transform_add[3]    = ff_hevc_transform_add32_8_avx2;
        }
#endif
    } else if (bit_depth == 10) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->transform_add[0] = ff_hevc_transform_add4_10_mmxext;
            c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
            }

            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;

            c->transform_add[1]    = ff_hevc_transform_add8_10_sse2;
            c->transform_add[2]    = ff_hevc_transform_add16_10_sse2;
            c->transform_add[3]    = ff_hevc_transform_add32_10_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
            }
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {

            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;

            c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
            c->transform_add[3] = ff_hevc_transform_add32_10_avx2;

        }
#endif
    } else if (bit_depth == 12) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
            }

            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
            }
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
        }
#endif
    }
}
示例#8
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##_##opt

#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    if (ARCH_X86_64) { \
        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
    } \
} while (0)

#define init_ipred(sz, opt, t, e) \
    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt

#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
#define init_dir_tm_ipred(sz, opt) do { \
    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
    init_ipred(sz, opt, hd, HOR_DOWN); \
    init_ipred(sz, opt, vl, VERT_LEFT); \
    init_ipred(sz, opt, hu, HOR_UP); \
    init_ipred(sz, opt, tm, TM_VP8); \
    init_ipred(sz, opt, vr, VERT_RIGHT); \
} while (0)
#define init_dir_tm_h_ipred(sz, opt) do { \
    init_dir_tm_ipred(sz, opt); \
    init_ipred(sz, opt, h,  HOR); \
} while (0)
#define init_dc_ipred(sz, opt) do { \
    init_ipred(sz, opt, dc,      DC); \
    init_ipred(sz, opt, dc_left, LEFT_DC); \
    init_ipred(sz, opt, dc_top,  TOP_DC); \
} while (0)
#define init_all_ipred(sz, opt) do { \
    init_dc_ipred(sz, opt); \
    init_dir_tm_h_ipred(sz, opt); \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
        init_ipred(8, mmx, v, VERT);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
        init_dc_ipred(4, mmxext);
        init_dc_ipred(8, mmxext);
        init_dir_tm_ipred(4, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
        init_ipred(16, sse, v, VERT);
        init_ipred(32, sse, v, VERT);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
        init_dc_ipred(16, sse2);
        init_dc_ipred(32, sse2);
        init_dir_tm_h_ipred(8, sse2);
        init_dir_tm_h_ipred(16, sse2);
        init_dir_tm_h_ipred(32, sse2);
        init_ipred(4, sse2, h, HOR);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        init_lpf(ssse3);
        init_all_ipred(4, ssse3);
        init_all_ipred(8, ssse3);
        init_all_ipred(16, ssse3);
        init_all_ipred(32, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
        init_dir_tm_h_ipred(8, avx);
        init_dir_tm_h_ipred(16, avx);
        init_dir_tm_h_ipred(32, avx);
        init_ipred(32, avx, v, VERT);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);
        if (ARCH_X86_64) {
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
            init_subpel3_32_64(0, put, avx2);
            init_subpel3_32_64(1, avg, avx2);
#endif
        }
        init_dc_ipred(32, avx2);
        init_ipred(32, avx2, h,  HOR);
        init_ipred(32, avx2, tm, TM_VP8);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
示例#9
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMXEXT(mm_flags)) {
                c->transform_dc_add[0]    =  ff_hevc_idct4_dc_add_8_mmxext;
                c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_8_mmxext;

        }
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;

                    c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_8_sse2;
                    c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_8_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
        }
        if (EXTERNAL_AVX2(mm_flags)) {
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_8_avx2;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMXEXT(mm_flags)) {
                c->transform_dc_add[0]    =  ff_hevc_idct4_dc_add_10_mmxext;

        }
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;


                    c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_10_sse2;
                    c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_sse2;
                    c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_sse2;
                }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
        if (EXTERNAL_AVX(mm_flags)) {
            c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_10_avx;
            c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_avx;
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_avx;
        }
        if (EXTERNAL_AVX2(mm_flags)) {
            c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_avx2;
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_avx2;

        }
    }
}