Exemple #1
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);

        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
    }
}
Exemple #2
0
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{
    int cpu_flags = av_get_cpu_flags();

    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
        switch (param->mode) {
        case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
        case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
        case BLEND_AND:      param->blend = ff_blend_and_sse2;      break;
        case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
        case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
        case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
        case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
        case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
        case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
        case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
        }
    }
    if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
        switch (param->mode) {
        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
        case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
        }
    }
}
Exemple #3
0
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
{
    int high_bit_depth = bit_depth > 8;
    int cpu_flags      = av_get_cpu_flags();

#if (HAVE_MMX_EXTERNAL == 1)
    if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
    }
#endif

#if (HAVE_AMD3DNOW_EXTERNAL == 1)
    if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
    }
#endif

#if (HAVE_MMXEXT_EXTERNAL == 1)
    if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
    }
#endif

#if (HAVE_SSE2_EXTERNAL == 1)
    if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
    }
#endif

#if (HAVE_SSSE3_EXTERNAL == 1)
    if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
    }
#endif

#if (HAVE_AVX_EXTERNAL == 1)
    if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        // AVX implies !cache64.
        // TODO: Port cache(32|64) detection from x264.
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
    }
#endif
}
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();
    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);

    if (EXTERNAL_MMX(cpu_flags)) {
        c->add_int16 = ff_add_int16_mmx;
        c->diff_int16 = ff_diff_int16_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
        c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->add_int16 = ff_add_int16_sse2;
        c->diff_int16 = ff_diff_int16_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4;
    }
}
Exemple #5
0
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
#endif

    if (X86_MMX(cpu_flags))
        dsputil_init_mmx(c, avctx, cpu_flags);

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx);
}
Exemple #6
0
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

    if (X86_MMX(cpu_flags))
        dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
}
av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags))
        c->bswap_buf = ff_bswap32_buf_sse2;
    if (EXTERNAL_SSSE3(cpu_flags))
        c->bswap_buf = ff_bswap32_buf_ssse3;
}
Exemple #8
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_mmxext;
        } else {
            c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_sse2;
        }
    }

    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        if (!bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_round_sse2;
        }
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (cpu_flags & AV_CPU_FLAG_ATOM) {
            c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
        } else {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
            c->apply_window_int16 = ff_apply_window_int16_ssse3;
        }
    }
}
av_cold void ff_v210enc_init_x86(V210EncContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSSE3(cpu_flags)) {
        s->pack_line_8 = ff_v210_planar_pack_8_ssse3;
        s->pack_line_10 = ff_v210_planar_pack_10_ssse3;
    }

    if (EXTERNAL_AVX(cpu_flags))
        s->pack_line_8 = ff_v210_planar_pack_8_avx;
}
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();
    int bit_depth = (!yadif->csp) ? 8
                                  : yadif->csp->comp[0].depth_minus1 + 1;

    if (bit_depth >= 15) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_ssse3;
        if (EXTERNAL_SSE4(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
    } else if ( bit_depth >= 9 && bit_depth <= 14) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_ssse3;
    } else {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_ssse3;
    }
#endif /* HAVE_YASM */
}
Exemple #11
0
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMXEXT(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
    if (EXTERNAL_SSE2(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_sse2;
    if (EXTERNAL_SSSE3(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_ssse3;
}
Exemple #12
0
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_MMX(cpu_flags))
        ff_vc1dsp_init_mmx(dsp);
#endif
#if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM
    if (INLINE_MMXEXT(cpu_flags))
        ff_vc1dsp_init_mmxext(dsp);
#endif
#define ASSIGN_LF(EXT) \
        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT

#if HAVE_YASM
    if (EXTERNAL_MMX(cpu_flags)) {
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        ASSIGN_LF(mmxext);
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;

        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ASSIGN_LF(ssse3);
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
    }
#endif /* HAVE_YASM */
}
Exemple #13
0
av_cold void ff_gradfun_init_x86(GradFunContext *gf)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMXEXT(cpu_flags))
        gf->filter_line = gradfun_filter_line_mmxext;
    if (EXTERNAL_SSSE3(cpu_flags))
        gf->filter_line = gradfun_filter_line_ssse3;

    if (EXTERNAL_SSE2(cpu_flags))
        gf->blur_line = gradfun_blur_line_sse2;
#endif /* HAVE_X86ASM */
}
av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMXEXT(cpu_flags))
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;

    if (EXTERNAL_SSE2(cpu_flags))
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;

    if (EXTERNAL_SSSE3(cpu_flags) &&
        !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
}
Exemple #15
0
av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
#if ARCH_X86_32
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
#endif
        c->put_vp8_epel_pixels_tab[1][0][0]     =
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
    }

    /* note that 4-tap width=16 functions are missing because w=16
     * is only used for luma, and luma is always a copy or sixtap. */
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        VP8_MC_FUNC(2, 4, mmxext);
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
#if ARCH_X86_32
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
        VP8_MC_FUNC(1, 8, mmxext);
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
        VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
#endif
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
    }

    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
        VP8_LUMA_MC_FUNC(0, 16, sse2);
        VP8_MC_FUNC(1, 8, sse2);
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
        VP8_MC_FUNC(1, 8, ssse3);
        VP8_MC_FUNC(2, 4, ssse3);
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
    }
#endif /* HAVE_YASM */
}
Exemple #16
0
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
#endif

    if (X86_MMX(cpu_flags)) {
#if HAVE_INLINE_ASM
        const int idct_algo = avctx->idct_algo;

        if (avctx->lowres == 0 && !high_bit_depth) {
            if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
                c->idct_put              = ff_simple_idct_put_mmx;
                c->idct_add              = ff_simple_idct_add_mmx;
                c->idct                  = ff_simple_idct_mmx;
                c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
            } else if (idct_algo == FF_IDCT_XVIDMMX) {
                c->idct_put              = ff_idct_xvid_mmx_put;
                c->idct_add              = ff_idct_xvid_mmx_add;
                c->idct                  = ff_idct_xvid_mmx;
            }
        }
#endif /* HAVE_INLINE_ASM */

        dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
    }

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
}
Exemple #17
0
void ff_framerate_init_x86(FrameRateContext *s)
{
    int cpu_flags = av_get_cpu_flags();
    if (s->bitdepth == 8) {
        if (EXTERNAL_AVX2_FAST(cpu_flags))
            s->blend = ff_blend_frames_avx2;
        else if (EXTERNAL_SSSE3(cpu_flags))
            s->blend = ff_blend_frames_ssse3;
    } else {
        if (EXTERNAL_AVX2_FAST(cpu_flags))
            s->blend = ff_blend_frames16_avx2;
        else if (EXTERNAL_SSE4(cpu_flags))
            s->blend = ff_blend_frames16_sse4;
    }
}
Exemple #18
0
av_cold void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{
    int flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(flags))
        dsp->add_bytes_l2         = ff_add_bytes_l2_mmx;
#endif
    if (EXTERNAL_MMXEXT(flags))
        dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmxext;
    if (EXTERNAL_SSE2(flags))
        dsp->add_bytes_l2         = ff_add_bytes_l2_sse2;
    if (EXTERNAL_SSSE3(flags))
        dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
}
Exemple #19
0
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {


            if (EXTERNAL_MMXEXT(mm_flags)) {


                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSSE3(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->pred_planar[0]= pred_planar_0_8_sse;
                    c->pred_planar[1]= pred_planar_1_8_sse;
                    c->pred_planar[2]= pred_planar_2_8_sse;
                    c->pred_planar[3]= pred_planar_3_8_sse;

                   // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance
                    c->pred_angular[1]= pred_angular_1_8_sse;
                    c->pred_angular[2]= pred_angular_2_8_sse;
                    c->pred_angular[3]= pred_angular_3_8_sse;
                }
                if (EXTERNAL_AVX(mm_flags)) {

                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {

                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    }
}
Exemple #20
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(mm_flags)) {
        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
    }
    if (EXTERNAL_SSE(mm_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        }
    }
    if (EXTERNAL_SSSE3(mm_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
        }
    }
}
Exemple #21
0
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
#endif

    if (INLINE_MMX(cpu_flags))
        c->add_bytes = ff_add_bytes_mmx;

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        /* slower than cmov version on AMD */
        if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
            c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3;
        if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
            c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4;
    }
}
av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                 enum AVSampleFormat out_fmt,
                                 enum AVSampleFormat in_fmt,
                                 int channels){
    int mm_flags = av_get_cpu_flags();

    ac->simd_f= NULL;

//FIXME add memcpy case

#define MULTI_CAPS_FUNC(flag, cap) \
    if (EXTERNAL_##flag(mm_flags)) {\
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
            ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
    }

MULTI_CAPS_FUNC(MMX, mmx)
MULTI_CAPS_FUNC(SSE2, sse2)

    if(EXTERNAL_MMX(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
        }
    }
    if(EXTERNAL_SSE(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_sse;
        }
    }
    if(EXTERNAL_SSE2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
            ac->simd_f =  ff_int16_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int16_a_sse2;

        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
        }
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse2;
        }
    }
    if(EXTERNAL_SSSE3(mm_flags)) {
        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
        }
    }
    if(EXTERNAL_AVX(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_avx;
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
        }
    }
    if(EXTERNAL_AVX2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_avx2;
    }
}
av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                                   const int bit_depth,
                                   const int chroma_format_idc)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
#if HAVE_MMX_EXTERNAL
        if (EXTERNAL_MMX(mm_flags)) {
            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
            if (chroma_format_idc == 1) {
                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_mmx;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_mmx;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_mmx;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    if (mm_flags & AV_CPU_FLAG_CMOV)
                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
                }
            }
        }
#endif
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmxext;
            if (chroma_format_idc == 1)
                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
            if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
            }
            if (codec_id != AV_CODEC_ID_RV40) {
                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                if (chroma_format_idc == 1) {
                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
                }
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmxext;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmxext;
                } else {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmxext;
                }
            }
        }
#endif
#if HAVE_SSE_EXTERNAL
        if (EXTERNAL_SSE(mm_flags)) {
            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
                }
            }
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
            if (chroma_format_idc == 1)
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
                }
            }
        }
#endif
    } else if (bit_depth == 10) {
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;

            if (chroma_format_idc == 1)
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;

            h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_mmxext;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_mmxext;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_mmxext;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_mmxext;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_mmxext;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_mmxext;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;

            if (chroma_format_idc == 1) {
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
            }

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;

            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
        }
#endif
#if HAVE_AVX_EXTERNAL
        if (EXTERNAL_AVX(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
        }
#endif
    }
}
Exemple #24
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt)                            \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_ ## type ## sz ## _ ## opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _ ## opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);

#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0, put, avx2);
        init_subpel3_32_64(1, avg, avx2);
#endif /* ARCH_X86_64 && HAVE_AVX2_EXTERNAL */
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_X86ASM */
}
av_cold void INIT_FUNC(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt

#define init_lpf_funcs(bpp, opt) \
    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
    init_lpf_16_func(0, h, bpp, opt); \
    init_lpf_16_func(1, v, bpp, opt); \
    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
        init_lpf_funcs(BPC, sse2);
        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_lpf_funcs(BPC, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_lpf_funcs(BPC, avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
#if HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
#endif
    }

#endif /* HAVE_YASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}
av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt

#define init_lpf_funcs(bpp, opt) \
    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
    init_lpf_16_func(0, h, bpp, opt); \
    init_lpf_16_func(1, v, bpp, opt); \
    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)

#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
    dsp->itxfm_add[idxa][idxb] = \
        cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt);
#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
    init_itx_func(idx, DCT_DCT,   typea, typeb, size, bpp, opt); \
    init_itx_func(idx, ADST_DCT,  typea, typeb, size, bpp, opt); \
    init_itx_func(idx, DCT_ADST,  typea, typeb, size, bpp, opt); \
    init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
#define init_itx_funcs(idx, size, bpp, opt) \
    init_itx_func(idx, DCT_DCT,   idct,  idct,  size, bpp, opt); \
    init_itx_func(idx, ADST_DCT,  idct,  iadst, size, bpp, opt); \
    init_itx_func(idx, DCT_ADST,  iadst, idct,  size, bpp, opt); \
    init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
        if (!bitexact) {
            init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext);
#if BPC == 10
            init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext);
#endif
        }
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
        init_lpf_funcs(BPC, sse2);
        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
#if BPC == 10
        if (!bitexact) {
            init_itx_func(TX_4X4, ADST_DCT,  idct,  iadst, 4, 10, sse2);
            init_itx_func(TX_4X4, DCT_ADST,  iadst, idct,  4, 10, sse2);
            init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2);
        }
#else
        init_itx_funcs(TX_4X4, 4, 12, sse2);
#endif
        init_itx_funcs(TX_8X8, 8, BPC, sse2);
        init_itx_funcs(TX_16X16, 16, BPC, sse2);
        init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_lpf_funcs(BPC, ssse3);
#if BPC == 10
        if (!bitexact) {
            init_itx_funcs(TX_4X4, 4, BPC, ssse3);
        }
#endif
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_lpf_funcs(BPC, avx);
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
#if HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
#endif
    }

#endif /* HAVE_X86ASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}
Exemple #27
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
{
#if HAVE_YASM
    int cpu_flags;

    if (bpp == 10) {
        ff_vp9dsp_init_10bpp_x86(dsp, bitexact);
        return;
    } else if (bpp == 12) {
        ff_vp9dsp_init_12bpp_x86(dsp, bitexact);
        return;
    }

    cpu_flags = av_get_cpu_flags();

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

#define init_ipred(sz, opt, t, e) \
    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt

#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
#define init_dir_tm_ipred(sz, opt) do { \
    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
    init_ipred(sz, opt, hd, HOR_DOWN); \
    init_ipred(sz, opt, vl, VERT_LEFT); \
    init_ipred(sz, opt, hu, HOR_UP); \
    init_ipred(sz, opt, tm, TM_VP8); \
    init_ipred(sz, opt, vr, VERT_RIGHT); \
} while (0)
#define init_dir_tm_h_ipred(sz, opt) do { \
    init_dir_tm_ipred(sz, opt); \
    init_ipred(sz, opt, h,  HOR); \
} while (0)
#define init_dc_ipred(sz, opt) do { \
    init_ipred(sz, opt, dc,      DC); \
    init_ipred(sz, opt, dc_left, LEFT_DC); \
    init_ipred(sz, opt, dc_top,  TOP_DC); \
} while (0)
#define init_all_ipred(sz, opt) do { \
    init_dc_ipred(sz, opt); \
    init_dir_tm_h_ipred(sz, opt); \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel_func(4, 0,  4, put, , mmx);
        init_fpel_func(3, 0,  8, put, , mmx);
        if (!bitexact) {
            dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
            dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
            dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
            dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
        }
        init_ipred(8, mmx, v, VERT);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, 8, mmxext);
        init_subpel2(4, 1, 4, avg, 8, mmxext);
        init_fpel_func(4, 1,  4, avg, _8, mmxext);
        init_fpel_func(3, 1,  8, avg, _8, mmxext);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
        init_dc_ipred(4, mmxext);
        init_dc_ipred(8, mmxext);
        init_dir_tm_ipred(4, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel_func(2, 0, 16, put, , sse);
        init_fpel_func(1, 0, 32, put, , sse);
        init_fpel_func(0, 0, 64, put, , sse);
        init_ipred(16, sse, v, VERT);
        init_ipred(32, sse, v, VERT);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, 8, sse2);
        init_subpel3_8to64(1, avg, 8, sse2);
        init_fpel_func(2, 1, 16, avg,  _8, sse2);
        init_fpel_func(1, 1, 32, avg,  _8, sse2);
        init_fpel_func(0, 1, 64, avg,  _8, sse2);
        init_lpf(sse2);
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
        init_dc_ipred(16, sse2);
        init_dc_ipred(32, sse2);
        init_dir_tm_h_ipred(8, sse2);
        init_dir_tm_h_ipred(16, sse2);
        init_dir_tm_h_ipred(32, sse2);
        init_ipred(4, sse2, h, HOR);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, 8, ssse3);
        init_subpel3(1, avg, 8, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        init_lpf(ssse3);
        init_all_ipred(4, ssse3);
        init_all_ipred(8, ssse3);
        init_all_ipred(16, ssse3);
        init_all_ipred(32, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        init_lpf(avx);
        init_dir_tm_h_ipred(8, avx);
        init_dir_tm_h_ipred(16, avx);
        init_dir_tm_h_ipred(32, avx);
    }
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        init_fpel_func(1, 0, 32, put, , avx);
        init_fpel_func(0, 0, 64, put, , avx);
        init_ipred(32, avx, v, VERT);
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        init_fpel_func(1, 1, 32, avg, _8, avx2);
        init_fpel_func(0, 1, 64, avg, _8, avx2);
        if (ARCH_X86_64) {
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2;
            init_subpel3_32_64(0, put, 8, avx2);
            init_subpel3_32_64(1, avg, 8, avx2);
#endif
        }
        init_dc_ipred(32, avx2);
        init_ipred(32, avx2, h,  HOR);
        init_ipred(32, avx2, tm, TM_VP8);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
Exemple #28
0
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                                 const int chroma_format_idc)
{
#if HAVE_YASM
    int mm_flags = av_get_cpu_flags();

    if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {
            c->h264_idct_dc_add   =
            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
            c->h264_idct8_dc_add  =
            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;

            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
            if (chroma_format_idc == 1)
                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
            if (mm_flags & AV_CPU_FLAG_CMOV)
                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;

            if (EXTERNAL_MMXEXT(mm_flags)) {
                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmxext;
                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
                c->h264_idct_add16   = ff_h264_idct_add16_8_mmxext;
                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmxext;
                if (chroma_format_idc == 1)
                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext;
                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext;

                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext;
                if (chroma_format_idc == 1) {
                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmxext;
                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
                }
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmxext;
                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmxext;
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext;
                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext;
                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;

                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext;
                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;

                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;

                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
                }
                if (EXTERNAL_SSSE3(mm_flags)) {
                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32
                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmxext;
                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmxext;
                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmxext;
                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmxext;
#endif /* ARCH_X86_32 */
                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif /* HAVE_ALIGNED_STACK */

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif /* HAVE_ALIGNED_STACK */
                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_idct_dc_add  =
                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
#endif /* HAVE_ALIGNED_STACK */

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
#endif /* HAVE_ALIGNED_STACK */
                }
            }
        }
    }
#endif
}
Exemple #29
0
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                  0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                      0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                      6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2);
        } else {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow);
        }
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
    }
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
    }
    if (EXTERNAL_AVX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
    }
}
Exemple #30
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_##type##sz##_##opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx, idxh, idxv, dir, type, opt) \
    init_subpel1(0, idx, idxh, idxv, 64, dir, type, opt); \
    init_subpel1(1, idx, idxh, idxv, 32, dir, type, opt); \
    init_subpel1(2, idx, idxh, idxv, 16, dir, type, opt); \
    init_subpel1(3, idx, idxh, idxv,  8, dir, type, opt); \
    init_subpel1(4, idx, idxh, idxv,  4, dir, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel2(idx, 1, 1, hv, type, opt); \
    init_subpel2(idx, 0, 1, v, type, opt); \
    init_subpel2(idx, 1, 0, h, type, opt)

#define init_lpf(opt) do { \
    if (ARCH_X86_64) { \
        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
    } \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        }
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        }
        init_lpf(avx);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}