示例#1
0
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
{
    int high_bit_depth = bit_depth > 8;
    int cpu_flags      = av_get_cpu_flags();

#if (HAVE_MMX_EXTERNAL == 1)
    if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
    }
#endif

#if (HAVE_AMD3DNOW_EXTERNAL == 1)
    if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
    }
#endif

#if (HAVE_MMXEXT_EXTERNAL == 1)
    if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
    }
#endif

#if (HAVE_SSE2_EXTERNAL == 1)
    if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
    }
#endif

#if (HAVE_SSSE3_EXTERNAL == 1)
    if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
    }
#endif

#if (HAVE_AVX_EXTERNAL == 1)
    if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        // AVX implies !cache64.
        // TODO: Port cache(32|64) detection from x264.
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
    }
#endif
}
示例#2
0
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {


            if (EXTERNAL_MMXEXT(mm_flags)) {


                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSSE3(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->pred_planar[0]= pred_planar_0_8_sse;
                    c->pred_planar[1]= pred_planar_1_8_sse;
                    c->pred_planar[2]= pred_planar_2_8_sse;
                    c->pred_planar[3]= pred_planar_3_8_sse;

                   // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance
                    c->pred_angular[1]= pred_angular_1_8_sse;
                    c->pred_angular[2]= pred_angular_2_8_sse;
                    c->pred_angular[3]= pred_angular_3_8_sse;
                }
                if (EXTERNAL_AVX(mm_flags)) {

                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {

                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    }
}
示例#3
0
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();
    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);

    if (EXTERNAL_MMX(cpu_flags)) {
        c->add_int16 = ff_add_int16_mmx;
        c->diff_int16 = ff_diff_int16_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
        c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->add_int16 = ff_add_int16_sse2;
        c->diff_int16 = ff_diff_int16_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4;
    }
}
示例#4
0
void ff_diracdsp_init_x86(DiracDSPContext* c)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
#if !ARCH_X86_64
        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
        c->dirac_hpel_filter = dirac_hpel_filter_mmx;
        c->add_rect_clamped = ff_add_rect_clamped_mmx;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
#endif
        PIXFUNC(put, 0, mmx);
        PIXFUNC(avg, 0, mmx);
    }

    if (EXTERNAL_MMXEXT(mm_flags)) {
        PIXFUNC(avg, 0, mmxext);
    }

    if (EXTERNAL_SSE2(mm_flags)) {
        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
        c->add_rect_clamped = ff_add_rect_clamped_sse2;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;

        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;

        c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
        c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
        c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
        c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
    }
}
示例#5
0
文件: vp3dsp_init.c 项目: Et0h/mpc-hc
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{
    int cpuflags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpuflags)) {
        c->idct_put  = ff_vp3_idct_put_mmx;
        c->idct_add  = ff_vp3_idct_add_mmx;
        c->idct_perm = FF_PARTTRANS_IDCT_PERM;
    }
#endif

    if (EXTERNAL_MMXEXT(cpuflags)) {
        c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;

        if (!(flags & CODEC_FLAG_BITEXACT)) {
            c->v_loop_filter = ff_vp3_v_loop_filter_mmx2;
            c->h_loop_filter = ff_vp3_h_loop_filter_mmx2;
        }
    }

    if (EXTERNAL_SSE2(cpuflags)) {
        c->idct_put  = ff_vp3_idct_put_sse2;
        c->idct_add  = ff_vp3_idct_add_sse2;
        c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
    }
}
示例#6
0
av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                                   unsigned high_bit_depth)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (high_bit_depth ||
        !(avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_XVID))
        return;

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpu_flags)) {
        c->idct_put  = xvid_idct_mmx_put;
        c->idct_add  = xvid_idct_mmx_add;
        c->idct      = ff_xvid_idct_mmx;
        c->perm_type = FF_IDCT_PERM_NONE;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->idct_put  = xvid_idct_mmxext_put;
        c->idct_add  = xvid_idct_mmxext_add;
        c->idct      = ff_xvid_idct_mmxext;
        c->perm_type = FF_IDCT_PERM_NONE;
    }
#endif

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->idct_put  = ff_xvid_idct_put_sse2;
        c->idct_add  = ff_xvid_idct_add_sse2;
        c->idct      = ff_xvid_idct_sse2;
        c->perm_type = FF_IDCT_PERM_SSE2;
    }
#endif /* HAVE_YASM */
}
示例#7
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_mmxext;
        } else {
            c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_sse2;
        }
    }

    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        if (!bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_round_sse2;
        }
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (cpu_flags & AV_CPU_FLAG_ATOM) {
            c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
        } else {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
            c->apply_window_int16 = ff_apply_window_int16_ssse3;
        }
    }
}
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();
    int bit_depth = (!yadif->csp) ? 8
                                  : yadif->csp->comp[0].depth_minus1 + 1;

    if (bit_depth >= 15) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_ssse3;
        if (EXTERNAL_SSE4(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
    } else if ( bit_depth >= 9 && bit_depth <= 14) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_ssse3;
    } else {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_ssse3;
    }
#endif /* HAVE_YASM */
}
示例#9
0
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMXEXT(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
    if (EXTERNAL_SSE2(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_sse2;
    if (EXTERNAL_SSSE3(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_ssse3;
}
示例#10
0
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags))
        c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
    if (EXTERNAL_MMXEXT(mm_flags)) {
        c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
        c->rv34_idct_add         = ff_rv34_idct_add_mmx2;
    }
    if (EXTERNAL_SSE4(mm_flags))
        c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
}
示例#11
0
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_MMX(cpu_flags))
        ff_vc1dsp_init_mmx(dsp);
#endif
#if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM
    if (INLINE_MMXEXT(cpu_flags))
        ff_vc1dsp_init_mmxext(dsp);
#endif
#define ASSIGN_LF(EXT) \
        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT

#if HAVE_YASM
    if (EXTERNAL_MMX(cpu_flags)) {
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        ASSIGN_LF(mmxext);
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;

        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ASSIGN_LF(ssse3);
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
    }
#endif /* HAVE_YASM */
}
示例#12
0
av_cold void ff_gradfun_init_x86(GradFunContext *gf)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMXEXT(cpu_flags))
        gf->filter_line = gradfun_filter_line_mmxext;
    if (EXTERNAL_SSSE3(cpu_flags))
        gf->filter_line = gradfun_filter_line_ssse3;

    if (EXTERNAL_SSE2(cpu_flags))
        gf->blur_line = gradfun_blur_line_sse2;
#endif /* HAVE_X86ASM */
}
示例#13
0
av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMXEXT(cpu_flags))
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;

    if (EXTERNAL_SSE2(cpu_flags))
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;

    if (EXTERNAL_SSSE3(cpu_flags) &&
        !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
}
示例#14
0
av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
#if ARCH_X86_32
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
#endif
        c->put_vp8_epel_pixels_tab[1][0][0]     =
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
    }

    /* note that 4-tap width=16 functions are missing because w=16
     * is only used for luma, and luma is always a copy or sixtap. */
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        VP8_MC_FUNC(2, 4, mmxext);
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
#if ARCH_X86_32
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
        VP8_MC_FUNC(1, 8, mmxext);
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
        VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
#endif
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
    }

    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
        VP8_LUMA_MC_FUNC(0, 16, sse2);
        VP8_MC_FUNC(1, 8, sse2);
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
        VP8_MC_FUNC(1, 8, ssse3);
        VP8_MC_FUNC(2, 4, ssse3);
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
    }
#endif /* HAVE_YASM */
}
示例#15
0
av_cold void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{
    int flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(flags))
        dsp->add_bytes_l2         = ff_add_bytes_l2_mmx;
#endif
    if (EXTERNAL_MMXEXT(flags))
        dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmxext;
    if (EXTERNAL_SSE2(flags))
        dsp->add_bytes_l2         = ff_add_bytes_l2_sse2;
    if (EXTERNAL_SSSE3(flags))
        dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
}
示例#16
0
av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
{
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (flags & AV_CODEC_FLAG_BITEXACT) {
            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
        }
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        if (flags & AV_CODEC_FLAG_BITEXACT) {
            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
        }
    }
}
示例#17
0
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
{
    int av_unused mm_flags = av_get_cpu_flags();

    switch(c->format){
    case AV_SAMPLE_FMT_S16P:
        if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
                                        : ff_resample_common_int16_mmxext;
        }
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
                                        : ff_resample_common_int16_sse2;
        }
        if (EXTERNAL_XOP(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
                                        : ff_resample_common_int16_xop;
        }
        break;
    case AV_SAMPLE_FMT_FLTP:
        if (EXTERNAL_SSE(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
                                        : ff_resample_common_float_sse;
        }
        if (EXTERNAL_AVX_FAST(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                        : ff_resample_common_float_avx;
        }
        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                        : ff_resample_common_float_fma3;
        }
        if (EXTERNAL_FMA4(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
                                        : ff_resample_common_float_fma4;
        }
        break;
    case AV_SAMPLE_FMT_DBLP:
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
                                        : ff_resample_common_double_sse2;
        }
        break;
    }
}
示例#18
0
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmx;
    }

    // The best way to use SSE2 would be to do 2 SADs in parallel,
    // but we'd have to modify the pixelutils API to return SIMD functions.

    // It's probably not faster to shuffle data around
    // to get two lines of 8 pixels into a single 16byte register,
    // so just use the MMX 8x8 version even when SSE2 is available.
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmxext;
        sad[3] = ff_pixelutils_sad_16x16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        switch (aligned) {
        case 0: sad[3] = ff_pixelutils_sad_16x16_sse2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1   aligned, src2 unaligned
        case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1   aligned, src2   aligned
        }
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        switch (aligned) {
        case 0: sad[4] = ff_pixelutils_sad_32x32_sse2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1   aligned, src2 unaligned
        case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1   aligned, src2   aligned
        }
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        switch (aligned) {
        case 0: sad[4] = ff_pixelutils_sad_32x32_avx2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1   aligned, src2 unaligned
        case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1   aligned, src2   aligned
        }
    }
}
示例#19
0
av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
{
#if HAVE_YASM
    const int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpu_flags)) {
        idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmx;
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmxext;
    }
#endif // ARCH_x86_32

    if (EXTERNAL_SSE2(cpu_flags)) {
        idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2;
    }
#endif // HAVE_YASM
}
示例#20
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(mm_flags)) {
        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
    }
    if (EXTERNAL_SSE(mm_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        }
    }
    if (EXTERNAL_SSSE3(mm_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
        }
    }
}
示例#21
0
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmxext;
        sad[3] = ff_pixelutils_sad_16x16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        switch (aligned) {
        case 0: sad[3] = ff_pixelutils_sad_16x16_sse2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1   aligned, src2 unaligned
        case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1   aligned, src2   aligned
        }
    }
}
示例#22
0
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
#endif

    if (INLINE_MMX(cpu_flags))
        c->add_bytes = ff_add_bytes_mmx;

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        /* slower than cmov version on AMD */
        if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
            c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3;
        if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
            c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4;
    }
}
示例#23
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_##type##sz##_##opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx, idxh, idxv, dir, type, opt) \
    init_subpel1(0, idx, idxh, idxv, 64, dir, type, opt); \
    init_subpel1(1, idx, idxh, idxv, 32, dir, type, opt); \
    init_subpel1(2, idx, idxh, idxv, 16, dir, type, opt); \
    init_subpel1(3, idx, idxh, idxv,  8, dir, type, opt); \
    init_subpel1(4, idx, idxh, idxv,  4, dir, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel2(idx, 1, 1, hv, type, opt); \
    init_subpel2(idx, 0, 1, v, type, opt); \
    init_subpel2(idx, 1, 0, h, type, opt)

#define init_lpf(opt) do { \
    if (ARCH_X86_64) { \
        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
    } \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        }
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        }
        init_lpf(avx);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
示例#24
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt)                            \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_ ## type ## sz ## _ ## opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _ ## opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);

#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0, put, avx2);
        init_subpel3_32_64(1, avg, avx2);
#endif /* ARCH_X86_64 && HAVE_AVX2_EXTERNAL */
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_X86ASM */
}
av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt

#define init_lpf_funcs(bpp, opt) \
    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
    init_lpf_16_func(0, h, bpp, opt); \
    init_lpf_16_func(1, v, bpp, opt); \
    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)

#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
    dsp->itxfm_add[idxa][idxb] = \
        cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt);
#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
    init_itx_func(idx, DCT_DCT,   typea, typeb, size, bpp, opt); \
    init_itx_func(idx, ADST_DCT,  typea, typeb, size, bpp, opt); \
    init_itx_func(idx, DCT_ADST,  typea, typeb, size, bpp, opt); \
    init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
#define init_itx_funcs(idx, size, bpp, opt) \
    init_itx_func(idx, DCT_DCT,   idct,  idct,  size, bpp, opt); \
    init_itx_func(idx, ADST_DCT,  idct,  iadst, size, bpp, opt); \
    init_itx_func(idx, DCT_ADST,  iadst, idct,  size, bpp, opt); \
    init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
        if (!bitexact) {
            init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext);
#if BPC == 10
            init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext);
#endif
        }
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
        init_lpf_funcs(BPC, sse2);
        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
#if BPC == 10
        if (!bitexact) {
            init_itx_func(TX_4X4, ADST_DCT,  idct,  iadst, 4, 10, sse2);
            init_itx_func(TX_4X4, DCT_ADST,  iadst, idct,  4, 10, sse2);
            init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2);
        }
#else
        init_itx_funcs(TX_4X4, 4, 12, sse2);
#endif
        init_itx_funcs(TX_8X8, 8, BPC, sse2);
        init_itx_funcs(TX_16X16, 16, BPC, sse2);
        init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_lpf_funcs(BPC, ssse3);
#if BPC == 10
        if (!bitexact) {
            init_itx_funcs(TX_4X4, 4, BPC, ssse3);
        }
#endif
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_lpf_funcs(BPC, avx);
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
#if HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
#endif
    }

#endif /* HAVE_X86ASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}
示例#26
0
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#define SET_LUMA_FUNCS(tabname, funcname, depth, cf)      \
    c->tabname[0] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[1] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf;

#define SET_CHROMA_FUNCS(tabname, funcname, depth, cf)    \
    c->tabname[1] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf;

#define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS  (put_hevc_qpel[v][h], name, depth, cf)
#define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf)

    if (bit_depth == 8) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
            SET_QPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     8, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2);
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            SET_QPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_qpel_v);
            SET_EPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_epel_v);

        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_8_avx;
            c->idct[1] = ff_hevc_idct_8x8_8_avx;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
            SET_QPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     10, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_10_avx;
            c->idct[1] = ff_hevc_idct_8x8_10_avx;
        }
    }

#if ARCH_X86_64
    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_8_sse2;
            c->idct[3] = ff_hevc_idct_32x32_8_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }

        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     8, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4);
        }

        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_8_avx;
            c->idct[3] = ff_hevc_idct_32x32_8_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_10_sse2;
            c->idct[3] = ff_hevc_idct_32x32_10_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     10, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(0, 1, 10, avx, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 10, avx, ff_hevc_qpel_v);
            SET_QPEL_FUNCS(1, 1, 10, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v);
            SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_10_avx;
            c->idct[3] = ff_hevc_idct_32x32_10_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
        }
    }
#endif /* ARCH_X86_64 */
}
示例#27
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
{
#if HAVE_YASM
    int cpu_flags;

    if (bpp == 10) {
        ff_vp9dsp_init_10bpp_x86(dsp, bitexact);
        return;
    } else if (bpp == 12) {
        ff_vp9dsp_init_12bpp_x86(dsp, bitexact);
        return;
    }

    cpu_flags = av_get_cpu_flags();

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

#define init_ipred(sz, opt, t, e) \
    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt

#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
#define init_dir_tm_ipred(sz, opt) do { \
    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
    init_ipred(sz, opt, hd, HOR_DOWN); \
    init_ipred(sz, opt, vl, VERT_LEFT); \
    init_ipred(sz, opt, hu, HOR_UP); \
    init_ipred(sz, opt, tm, TM_VP8); \
    init_ipred(sz, opt, vr, VERT_RIGHT); \
} while (0)
#define init_dir_tm_h_ipred(sz, opt) do { \
    init_dir_tm_ipred(sz, opt); \
    init_ipred(sz, opt, h,  HOR); \
} while (0)
#define init_dc_ipred(sz, opt) do { \
    init_ipred(sz, opt, dc,      DC); \
    init_ipred(sz, opt, dc_left, LEFT_DC); \
    init_ipred(sz, opt, dc_top,  TOP_DC); \
} while (0)
#define init_all_ipred(sz, opt) do { \
    init_dc_ipred(sz, opt); \
    init_dir_tm_h_ipred(sz, opt); \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel_func(4, 0,  4, put, , mmx);
        init_fpel_func(3, 0,  8, put, , mmx);
        if (!bitexact) {
            dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
            dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
            dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
            dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
        }
        init_ipred(8, mmx, v, VERT);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, 8, mmxext);
        init_subpel2(4, 1, 4, avg, 8, mmxext);
        init_fpel_func(4, 1,  4, avg, _8, mmxext);
        init_fpel_func(3, 1,  8, avg, _8, mmxext);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
        init_dc_ipred(4, mmxext);
        init_dc_ipred(8, mmxext);
        init_dir_tm_ipred(4, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel_func(2, 0, 16, put, , sse);
        init_fpel_func(1, 0, 32, put, , sse);
        init_fpel_func(0, 0, 64, put, , sse);
        init_ipred(16, sse, v, VERT);
        init_ipred(32, sse, v, VERT);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, 8, sse2);
        init_subpel3_8to64(1, avg, 8, sse2);
        init_fpel_func(2, 1, 16, avg,  _8, sse2);
        init_fpel_func(1, 1, 32, avg,  _8, sse2);
        init_fpel_func(0, 1, 64, avg,  _8, sse2);
        init_lpf(sse2);
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
        init_dc_ipred(16, sse2);
        init_dc_ipred(32, sse2);
        init_dir_tm_h_ipred(8, sse2);
        init_dir_tm_h_ipred(16, sse2);
        init_dir_tm_h_ipred(32, sse2);
        init_ipred(4, sse2, h, HOR);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, 8, ssse3);
        init_subpel3(1, avg, 8, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        init_lpf(ssse3);
        init_all_ipred(4, ssse3);
        init_all_ipred(8, ssse3);
        init_all_ipred(16, ssse3);
        init_all_ipred(32, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        init_lpf(avx);
        init_dir_tm_h_ipred(8, avx);
        init_dir_tm_h_ipred(16, avx);
        init_dir_tm_h_ipred(32, avx);
    }
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        init_fpel_func(1, 0, 32, put, , avx);
        init_fpel_func(0, 0, 64, put, , avx);
        init_ipred(32, avx, v, VERT);
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        init_fpel_func(1, 1, 32, avg, _8, avx2);
        init_fpel_func(0, 1, 64, avg, _8, avx2);
        if (ARCH_X86_64) {
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2;
            init_subpel3_32_64(0, put, 8, avx2);
            init_subpel3_32_64(1, avg, 8, avx2);
#endif
        }
        init_dc_ipred(32, avx2);
        init_ipred(32, avx2, h,  HOR);
        init_ipred(32, avx2, tm, TM_VP8);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
示例#28
0
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                                 const int chroma_format_idc)
{
#if HAVE_YASM
    int mm_flags = av_get_cpu_flags();

    if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {
            c->h264_idct_dc_add   =
            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
            c->h264_idct8_dc_add  =
            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;

            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
            if (chroma_format_idc == 1)
                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
            if (mm_flags & AV_CPU_FLAG_CMOV)
                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;

            if (EXTERNAL_MMXEXT(mm_flags)) {
                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmxext;
                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
                c->h264_idct_add16   = ff_h264_idct_add16_8_mmxext;
                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmxext;
                if (chroma_format_idc == 1)
                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext;
                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext;

                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext;
                if (chroma_format_idc == 1) {
                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmxext;
                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
                }
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmxext;
                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmxext;
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext;
                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext;
                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;

                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext;
                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;

                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;

                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
                }
                if (EXTERNAL_SSSE3(mm_flags)) {
                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32
                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmxext;
                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmxext;
                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmxext;
                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmxext;
#endif /* ARCH_X86_32 */
                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif /* HAVE_ALIGNED_STACK */

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif /* HAVE_ALIGNED_STACK */
                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_idct_dc_add  =
                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
#endif /* HAVE_ALIGNED_STACK */

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
#endif /* HAVE_ALIGNED_STACK */
                }
            }
        }
    }
#endif
}
示例#29
0
av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                                   const int bit_depth,
                                   const int chroma_format_idc)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
#if HAVE_MMX_EXTERNAL
        if (EXTERNAL_MMX(mm_flags)) {
            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
            if (chroma_format_idc == 1) {
                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_mmx;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_mmx;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_mmx;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    if (mm_flags & AV_CPU_FLAG_CMOV)
                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
                }
            }
        }
#endif
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmxext;
            if (chroma_format_idc == 1)
                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
            if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
            }
            if (codec_id != AV_CODEC_ID_RV40) {
                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                if (chroma_format_idc == 1) {
                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
                }
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmxext;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmxext;
                } else {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmxext;
                }
            }
        }
#endif
#if HAVE_SSE_EXTERNAL
        if (EXTERNAL_SSE(mm_flags)) {
            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
                }
            }
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
            if (chroma_format_idc == 1)
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
                }
            }
        }
#endif
    } else if (bit_depth == 10) {
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;

            if (chroma_format_idc == 1)
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;

            h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_mmxext;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_mmxext;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_mmxext;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_mmxext;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_mmxext;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_mmxext;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;

            if (chroma_format_idc == 1) {
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
            }

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;

            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
        }
#endif
#if HAVE_AVX_EXTERNAL
        if (EXTERNAL_AVX(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
        }
#endif
    }
}
示例#30
0
av_cold void INIT_FUNC(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt

#define init_lpf_funcs(bpp, opt) \
    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
    init_lpf_16_func(0, h, bpp, opt); \
    init_lpf_16_func(1, v, bpp, opt); \
    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3(0, put, BPC, sse2);
        init_subpel3(1, avg, BPC, sse2);
        init_lpf_funcs(BPC, sse2);
        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_lpf_funcs(BPC, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_lpf_funcs(BPC, avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
#if HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0,  put, BPC, avx2);
        init_subpel3_32_64(1,  avg, BPC, avx2);
        init_subpel2(2, 0, 16, put, BPC, avx2);
        init_subpel2(2, 1, 16, avg, BPC, avx2);
#endif
    }

#endif /* HAVE_YASM */

    ff_vp9dsp_init_16bpp_x86(dsp);
}