void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } } else if (bit_depth == 10) { if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } } }
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) { } if (EXTERNAL_SSSE3(mm_flags)) { } if (EXTERNAL_SSE4(mm_flags)) { c->pred_planar[0]= pred_planar_0_8_sse; c->pred_planar[1]= pred_planar_1_8_sse; c->pred_planar[2]= pred_planar_2_8_sse; c->pred_planar[3]= pred_planar_3_8_sse; // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance c->pred_angular[1]= pred_angular_1_8_sse; c->pred_angular[2]= pred_angular_2_8_sse; c->pred_angular[3]= pred_angular_3_8_sse; } if (EXTERNAL_AVX(mm_flags)) { } } } } else if (bit_depth == 10) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) { } if (EXTERNAL_SSE4(mm_flags)) { } if (EXTERNAL_AVX(mm_flags)) { } } } } }
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) { int cpu_flags = av_get_cpu_flags(); const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); if (EXTERNAL_MMX(cpu_flags)) { c->add_int16 = ff_add_int16_mmx; c->diff_int16 = ff_diff_int16_mmx; } if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) { c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->add_int16 = ff_add_int16_sse2; c->diff_int16 = ff_diff_int16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4; } }
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); if (X86_MMX(cpu_flags)) dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); if (X86_MMXEXT(cpu_flags)) dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE(cpu_flags)) dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE2(cpu_flags)) dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSE4(cpu_flags)) dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); }
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) { int cpu_flags = av_get_cpu_flags(); #if HAVE_7REGS && HAVE_INLINE_ASM if (cpu_flags & AV_CPU_FLAG_CMOV) c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov; #endif if (X86_MMX(cpu_flags)) dsputil_init_mmx(c, avctx, cpu_flags); if (X86_MMXEXT(cpu_flags)) dsputil_init_mmxext(c, avctx, cpu_flags); if (X86_SSE(cpu_flags)) dsputil_init_sse(c, avctx, cpu_flags); if (X86_SSE2(cpu_flags)) dsputil_init_sse2(c, avctx, cpu_flags); if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags); if (EXTERNAL_SSE4(cpu_flags)) dsputil_init_sse4(c, avctx, cpu_flags); if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx); }
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) { int mm_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(mm_flags)) c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; if (EXTERNAL_MMXEXT(mm_flags)) { c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; c->rv34_idct_add = ff_rv34_idct_add_mmx2; } if (EXTERNAL_SSE4(mm_flags)) c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; }
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); #if HAVE_6REGS && HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) ff_vc1dsp_init_mmx(dsp); #endif #if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM if (INLINE_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); #endif #define ASSIGN_LF(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT #if HAVE_YASM if (EXTERNAL_MMX(cpu_flags)) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (EXTERNAL_MMXEXT(cpu_flags)) { ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF(ssse3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; } #endif /* HAVE_YASM */ }
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); #if HAVE_7REGS && HAVE_INLINE_ASM if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_CMOV) c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov; #endif if (X86_MMX(cpu_flags)) { #if HAVE_INLINE_ASM const int idct_algo = avctx->idct_algo; if (avctx->lowres == 0 && !high_bit_depth) { if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; c->idct_permutation_type = FF_SIMPLE_IDCT_PERM; } else if (idct_algo == FF_IDCT_XVIDMMX) { c->idct_put = ff_idct_xvid_mmx_put; c->idct_add = ff_idct_xvid_mmx_add; c->idct = ff_idct_xvid_mmx; } } #endif /* HAVE_INLINE_ASM */ dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); } if (X86_MMXEXT(cpu_flags)) dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE(cpu_flags)) dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE2(cpu_flags)) dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSE4(cpu_flags)) dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); }
void ff_framerate_init_x86(FrameRateContext *s) { int cpu_flags = av_get_cpu_flags(); if (s->bitdepth == 8) { if (EXTERNAL_AVX2_FAST(cpu_flags)) s->blend = ff_blend_frames_avx2; else if (EXTERNAL_SSSE3(cpu_flags)) s->blend = ff_blend_frames_ssse3; } else { if (EXTERNAL_AVX2_FAST(cpu_flags)) s->blend = ff_blend_frames16_avx2; else if (EXTERNAL_SSE4(cpu_flags)) s->blend = ff_blend_frames16_sse4; } }
int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, const uint8_t *src, ptrdiff_t src_linesize, ptrdiff_t bytewidth, int height) { int cpu_flags = av_get_cpu_flags(); ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64); if (EXTERNAL_SSE4(cpu_flags) && bw_aligned <= dst_linesize && bw_aligned <= src_linesize) ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize, bw_aligned, height); else return AVERROR(ENOSYS); return 0; }
void ff_diracdsp_init_x86(DiracDSPContext* c) { int mm_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(mm_flags)) { c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; #if !ARCH_X86_64 c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; c->dirac_hpel_filter = dirac_hpel_filter_mmx; c->add_rect_clamped = ff_add_rect_clamped_mmx; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx; #endif PIXFUNC(put, 0, mmx); PIXFUNC(avg, 0, mmx); } if (EXTERNAL_MMXEXT(mm_flags)) { PIXFUNC(avg, 0, mmxext); } if (EXTERNAL_SSE2(mm_flags)) { c->dirac_hpel_filter = dirac_hpel_filter_sse2; c->add_rect_clamped = ff_add_rect_clamped_sse2; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; } if (EXTERNAL_SSE4(mm_flags)) { c->dequant_subband[1] = ff_dequant_subband_32_sse4; #if ARCH_X86_64 c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; #endif } }
av_cold void ff_dcadsp_init_x86(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE(cpu_flags)) { #if ARCH_X86_32 s->decode_hf = ff_decode_hf_sse; #endif s->lfe_fir[0] = ff_dca_lfe_fir0_sse; s->lfe_fir[1] = ff_dca_lfe_fir1_sse; } if (EXTERNAL_SSE2(cpu_flags)) { s->decode_hf = ff_decode_hf_sse2; } if (EXTERNAL_SSE4(cpu_flags)) { s->decode_hf = ff_decode_hf_sse4; } }
av_cold void ff_psdsp_init_x86(PSDSPContext *s) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE(cpu_flags)) { s->add_squares = ff_ps_add_squares_sse; s->mul_pair_single = ff_ps_mul_pair_single_sse; s->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_sse; s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse; s->hybrid_analysis = ff_ps_hybrid_analysis_sse; } if (EXTERNAL_SSE3(cpu_flags)) { s->add_squares = ff_ps_add_squares_sse3; s->stereo_interpolate[0] = ff_ps_stereo_interpolate_sse3; s->stereo_interpolate[1] = ff_ps_stereo_interpolate_ipdopd_sse3; s->hybrid_analysis = ff_ps_hybrid_analysis_sse3; } if (EXTERNAL_SSE4(cpu_flags)) { s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse4; } }
av_cold void ff_proresdsp_init_x86(ProresDSPContext *dsp) { #if ARCH_X86_64 int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_put = ff_prores_idct_put_10_sse2; } if (EXTERNAL_SSE4(cpu_flags)) { dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_put = ff_prores_idct_put_10_sse4; } if (EXTERNAL_AVX(cpu_flags)) { dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_put = ff_prores_idct_put_10_avx; } #endif /* ARCH_X86_64 */ }
av_cold void ff_yadif_init_x86(YADIFContext *yadif) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth_minus1 + 1; if (bit_depth >= 15) { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_ssse3; if (EXTERNAL_SSE4(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse4; } else if ( bit_depth >= 9 && bit_depth <= 14) { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_ssse3; } else { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_ssse3; } #endif /* HAVE_YASM */ }
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int cpu_flags = av_get_cpu_flags(); #define SET_LUMA_FUNCS(tabname, funcname, depth, cf) \ c->tabname[0] = funcname ## _4_ ## depth ## _ ## cf; \ c->tabname[1] = funcname ## _8_ ## depth ## _ ## cf; \ c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \ c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \ c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \ c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \ c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \ c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf; #define SET_CHROMA_FUNCS(tabname, funcname, depth, cf) \ c->tabname[1] = funcname ## _4_ ## depth ## _ ## cf; \ c->tabname[3] = funcname ## _8_ ## depth ## _ ## cf; \ c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \ c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \ c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \ c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf; #define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS (put_hevc_qpel[v][h], name, depth, cf) #define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf) if (bit_depth == 8) { if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext; c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; c->idct[0] = ff_hevc_idct_4x4_8_sse2; c->idct[1] = ff_hevc_idct_8x8_8_sse2; SET_QPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels); SET_EPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels); SET_LUMA_FUNCS(put_unweighted_pred, ff_hevc_put_unweighted_pred, 8, sse2); SET_LUMA_FUNCS(put_unweighted_pred_avg, ff_hevc_put_unweighted_pred_avg, 8, sse2); SET_CHROMA_FUNCS(put_unweighted_pred_chroma, ff_hevc_put_unweighted_pred, 8, sse2); SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2); } if (EXTERNAL_SSSE3(cpu_flags)) { SET_QPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_qpel_h); SET_QPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_qpel_v); SET_EPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_epel_h); SET_EPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_epel_v); } if (EXTERNAL_AVX(cpu_flags)) { c->idct[0] = ff_hevc_idct_4x4_8_avx; c->idct[1] = ff_hevc_idct_8x8_8_avx; } } else if (bit_depth == 10) { if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext; c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2; c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; c->idct[0] = ff_hevc_idct_4x4_10_sse2; c->idct[1] = ff_hevc_idct_8x8_10_sse2; SET_QPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels); SET_EPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels); SET_LUMA_FUNCS(put_unweighted_pred, ff_hevc_put_unweighted_pred, 10, sse2); SET_LUMA_FUNCS(put_unweighted_pred_avg, ff_hevc_put_unweighted_pred_avg, 10, sse2); SET_CHROMA_FUNCS(put_unweighted_pred_chroma, ff_hevc_put_unweighted_pred, 10, sse2); SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2); } if (EXTERNAL_AVX(cpu_flags)) { c->idct[0] = ff_hevc_idct_4x4_10_avx; c->idct[1] = ff_hevc_idct_8x8_10_avx; } } #if ARCH_X86_64 if (bit_depth == 8) { if (EXTERNAL_SSE2(cpu_flags)) { c->idct[2] = ff_hevc_idct_16x16_8_sse2; c->idct[3] = ff_hevc_idct_32x32_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { SET_LUMA_FUNCS(weighted_pred, ff_hevc_put_weighted_pred, 8, sse4); SET_CHROMA_FUNCS(weighted_pred_chroma, ff_hevc_put_weighted_pred, 8, sse4); SET_LUMA_FUNCS(weighted_pred_avg, ff_hevc_put_weighted_pred_avg, 8, sse4); SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4); } if (EXTERNAL_AVX(cpu_flags)) { #if HAVE_AVX_EXTERNAL SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv); SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv); #endif /* HAVE_AVX_EXTERNAL */ c->idct[2] = ff_hevc_idct_16x16_8_avx; c->idct[3] = ff_hevc_idct_32x32_8_avx; } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; } } else if (bit_depth == 10) { if (EXTERNAL_SSE2(cpu_flags)) { c->idct[2] = ff_hevc_idct_16x16_10_sse2; c->idct[3] = ff_hevc_idct_32x32_10_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { SET_LUMA_FUNCS(weighted_pred, ff_hevc_put_weighted_pred, 10, sse4); SET_CHROMA_FUNCS(weighted_pred_chroma, ff_hevc_put_weighted_pred, 10, sse4); SET_LUMA_FUNCS(weighted_pred_avg, ff_hevc_put_weighted_pred_avg, 10, sse4); SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4); } if (EXTERNAL_AVX(cpu_flags)) { #if HAVE_AVX_EXTERNAL SET_QPEL_FUNCS(0, 1, 10, avx, ff_hevc_qpel_h); SET_QPEL_FUNCS(1, 0, 10, avx, ff_hevc_qpel_v); SET_QPEL_FUNCS(1, 1, 10, avx, hevc_qpel_hv); SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h); SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v); SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv); #endif /* HAVE_AVX_EXTERNAL */ c->idct[2] = ff_hevc_idct_16x16_10_avx; c->idct[3] = ff_hevc_idct_32x32_10_avx; } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; } } #endif /* ARCH_X86_64 */ }
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { #if HAVE_YASM int mm_flags = av_get_cpu_flags(); if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags)) c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext; if (bit_depth == 8) { if (EXTERNAL_MMX(mm_flags)) { c->h264_idct_dc_add = c->h264_idct_add = ff_h264_idct_add_8_mmx; c->h264_idct8_dc_add = c->h264_idct8_add = ff_h264_idct8_add_8_mmx; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; if (chroma_format_idc == 1) c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; if (mm_flags & AV_CPU_FLAG_CMOV) c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; if (EXTERNAL_MMXEXT(mm_flags)) { c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext; c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmxext; if (chroma_format_idc == 1) c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext; c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmxext; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext; if (chroma_format_idc == 1) { c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext; c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext; } #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmxext; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; #endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */ c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext; if (EXTERNAL_SSE2(mm_flags)) { c->h264_idct8_add = ff_h264_idct8_add_8_sse2; c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; if (chroma_format_idc == 1) c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2; c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; } if (EXTERNAL_SSSE3(mm_flags)) { c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; } if (EXTERNAL_AVX(mm_flags)) { c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; } } } } else if (bit_depth == 10) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { #if ARCH_X86_32 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext; c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext; c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; #endif /* ARCH_X86_32 */ c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext; if (EXTERNAL_SSE2(mm_flags)) { c->h264_idct_add = ff_h264_idct_add_10_sse2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; if (chroma_format_idc == 1) c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2; #if HAVE_ALIGNED_STACK c->h264_idct8_add = ff_h264_idct8_add_10_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; #endif /* HAVE_ALIGNED_STACK */ c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2; #if HAVE_ALIGNED_STACK c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; #endif /* HAVE_ALIGNED_STACK */ } if (EXTERNAL_SSE4(mm_flags)) { c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; } if (EXTERNAL_AVX(mm_flags)) { c->h264_idct_dc_add = c->h264_idct_add = ff_h264_idct_add_10_avx; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; c->h264_idct_add16 = ff_h264_idct_add16_10_avx; if (chroma_format_idc == 1) c->h264_idct_add8 = ff_h264_idct_add8_10_avx; c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx; #if HAVE_ALIGNED_STACK c->h264_idct8_add = ff_h264_idct8_add_10_avx; c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; #endif /* HAVE_ALIGNED_STACK */ c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx; #if HAVE_ALIGNED_STACK c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; #endif /* HAVE_ALIGNED_STACK */ } } } } #endif }
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit) { int cpu_flags = av_get_cpu_flags(); if (!is_16bit) { if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break; case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_sse2; break; case BLEND_AND: param->blend = ff_blend_and_sse2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break; case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_sse2; break; case BLEND_DIVIDE: param->blend = ff_blend_divide_sse2; break; case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break; case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break; case BLEND_OR: param->blend = ff_blend_or_sse2; break; case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break; case BLEND_SCREEN: param->blend = ff_blend_screen_sse2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break; case BLEND_EXTREMITY: param->blend = ff_blend_extremity_sse2; break; case BLEND_NEGATION: param->blend = ff_blend_negation_sse2; break; } } if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break; case BLEND_EXTREMITY: param->blend = ff_blend_extremity_ssse3; break; case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break; } } if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_avx2; break; case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_avx2; break; case BLEND_AND: param->blend = ff_blend_and_avx2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_avx2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_avx2; break; case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; break; case BLEND_HARDMIX: param->blend = ff_blend_hardmix_avx2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_avx2; break; case BLEND_MULTIPLY: param->blend = ff_blend_multiply_avx2; break; case BLEND_OR: param->blend = ff_blend_or_avx2; break; case BLEND_PHOENIX: param->blend = ff_blend_phoenix_avx2; break; case BLEND_SCREEN: param->blend = ff_blend_screen_avx2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_avx2; break; case BLEND_XOR: param->blend = ff_blend_xor_avx2; break; case BLEND_DIFFERENCE: param->blend = ff_blend_difference_avx2; break; case BLEND_EXTREMITY: param->blend = ff_blend_extremity_avx2; break; case BLEND_NEGATION: param->blend = ff_blend_negation_avx2; break; } } } else { /* is_16_bit */ #if ARCH_X86_64 if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_16_sse2; break; case BLEND_AND: param->blend = ff_blend_and_16_sse2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_16_sse2; break; case BLEND_OR: param->blend = ff_blend_or_16_sse2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_16_sse2; break; } } if (EXTERNAL_SSE4(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_sse4; break; case BLEND_DARKEN: param->blend = ff_blend_darken_16_sse4; break; case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_sse4; break; case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_sse4; break; case BLEND_EXTREMITY: param->blend = ff_blend_extremity_16_sse4; break; case BLEND_NEGATION: param->blend = ff_blend_negation_16_sse4; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_sse4; break; case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_sse4; break; } } if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_16_avx2; break; case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_avx2; break; case BLEND_AND: param->blend = ff_blend_and_16_avx2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_16_avx2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_16_avx2; break; case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_avx2; break; case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_avx2; break; case BLEND_EXTREMITY: param->blend = ff_blend_extremity_16_avx2; break; case BLEND_NEGATION: param->blend = ff_blend_negation_16_avx2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_avx2; break; case BLEND_OR: param->blend = ff_blend_or_16_avx2; break; case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_avx2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_avx2; break; case BLEND_XOR: param->blend = ff_blend_xor_16_avx2; break; } } #endif /* ARCH_X86_64 */ } }
av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, 0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } if (EXTERNAL_SSE(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, 2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse); } if (EXTERNAL_SSE2(cpu_flags)) { if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, 0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2); } else { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow); } ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16, 0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, 0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, 0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT, 0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, 6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2); } if (EXTERNAL_SSSE3(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3); } if (EXTERNAL_SSE4(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); } if (EXTERNAL_AVX_FAST(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 32, 32, "AVX", ff_conv_flt_to_s32_avx); } if (EXTERNAL_AVX(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, 2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, 6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx); } }
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_MMXEXT(mm_flags)) { c->transform_dc_add[0] = ff_hevc_idct4_dc_add_8_mmxext; c->transform_dc_add[1] = ff_hevc_idct8_dc_add_8_mmxext; } if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } if (EXTERNAL_AVX2(mm_flags)) { c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_avx2; } } else if (bit_depth == 10) { if (EXTERNAL_MMXEXT(mm_flags)) { c->transform_dc_add[0] = ff_hevc_idct4_dc_add_10_mmxext; } if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } if (EXTERNAL_AVX(mm_flags)) { c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_avx; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_avx; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_avx; } if (EXTERNAL_AVX2(mm_flags)) { c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_avx2; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_avx2; } } }
av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; #if ARCH_X86_32 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; c->vp8_idct_add = ff_vp8_idct_add_mmx; c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; #endif } /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ if (EXTERNAL_MMXEXT(cpu_flags)) { #if ARCH_X86_32 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; #endif } if (EXTERNAL_SSE(cpu_flags)) { c->vp8_idct_add = ff_vp8_idct_add_sse; c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; } if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) { c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; } if (EXTERNAL_SSE2(cpu_flags)) { c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; } #endif /* HAVE_YASM */ }
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_MMX(mm_flags)) { /*if (mm_flags & AV_CPU_FLAG_CMOV) c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; */ if (EXTERNAL_MMXEXT(mm_flags)) { #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL /* MMEXT optimizations */ #endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */ if (EXTERNAL_SSE2(mm_flags)) { #if 0 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; #endif } if (EXTERNAL_SSSE3(mm_flags)) { c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_8_sse4; c->transform_add[0] = ff_hevc_transform_4x4_add_8_sse4; c->transform_add[1] = ff_hevc_transform_8x8_add_8_sse4; c->transform_add[2] = ff_hevc_transform_16x16_add_8_sse4; c->transform_add[3] = ff_hevc_transform_32x32_add_8_sse4; c->put_unweighted_pred = ff_hevc_put_unweighted_pred_8_sse; c->put_hevc_qpel[0][0] = ff_hevc_put_hevc_qpel_pixels_8_sse; c->put_hevc_qpel[0][1] = ff_hevc_put_hevc_qpel_h_1_8_sse; c->put_hevc_qpel[0][2] = ff_hevc_put_hevc_qpel_h_2_8_sse; c->put_hevc_qpel[0][3] = ff_hevc_put_hevc_qpel_h_3_8_sse; c->put_hevc_qpel[1][0] = ff_hevc_put_hevc_qpel_v_1_8_sse; c->put_hevc_qpel[2][0] = ff_hevc_put_hevc_qpel_v_2_8_sse; c->put_hevc_qpel[3][0] = ff_hevc_put_hevc_qpel_v_3_8_sse; //#if ARCH_X86_64 #if 0 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; #endif } if (EXTERNAL_SSE4(mm_flags)) { #if GCC_VERSION > MIN_GCC_VERSION_MC || __APPLE__ c->put_weighted_pred_avg = ff_hevc_put_weighted_pred_avg_8_sse; c->weighted_pred = ff_hevc_weighted_pred_8_sse; c->weighted_pred_avg = ff_hevc_weighted_pred_avg_8_sse; c->put_hevc_epel[0][0] = ff_hevc_put_hevc_epel_pixels_8_sse; c->put_hevc_epel[0][1] = ff_hevc_put_hevc_epel_h_8_sse; c->put_hevc_epel[1][0] = ff_hevc_put_hevc_epel_v_8_sse; c->put_hevc_epel[1][1] = ff_hevc_put_hevc_epel_hv_8_sse; #endif c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_0_8_sse; c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_1_8_sse; c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_2_8_sse; c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_3_8_sse; c->sao_band_filter[0] = ff_hevc_sao_band_filter_0_8_sse; c->sao_band_filter[1] = ff_hevc_sao_band_filter_1_8_sse; c->sao_band_filter[2] = ff_hevc_sao_band_filter_2_8_sse; c->sao_band_filter[3] = ff_hevc_sao_band_filter_3_8_sse; c->put_hevc_qpel[1][1] = ff_hevc_put_hevc_qpel_h_1_v_1_sse; c->put_hevc_qpel[1][2] = ff_hevc_put_hevc_qpel_h_2_v_1_sse; c->put_hevc_qpel[1][3] = ff_hevc_put_hevc_qpel_h_3_v_1_sse; c->put_hevc_qpel[2][1] = ff_hevc_put_hevc_qpel_h_1_v_2_sse; c->put_hevc_qpel[2][2] = ff_hevc_put_hevc_qpel_h_2_v_2_sse; c->put_hevc_qpel[2][3] = ff_hevc_put_hevc_qpel_h_3_v_2_sse; c->put_hevc_qpel[3][1] = ff_hevc_put_hevc_qpel_h_1_v_3_sse; c->put_hevc_qpel[3][2] = ff_hevc_put_hevc_qpel_h_2_v_3_sse; c->put_hevc_qpel[3][3] = ff_hevc_put_hevc_qpel_h_3_v_3_sse; } if (EXTERNAL_AVX(mm_flags)) { } } } } else if (bit_depth == 10) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { #if ARCH_X86_32 #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(mm_flags)) { #if HAVE_ALIGNED_STACK /*stuff that requires aligned stack */ #endif /* HAVE_ALIGNED_STACK */ } if (EXTERNAL_SSE4(mm_flags)) { c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_10_sse4; c->transform_add[0] = ff_hevc_transform_4x4_add_10_sse4; c->transform_add[1] = ff_hevc_transform_8x8_add_10_sse4; c->transform_add[2] = ff_hevc_transform_16x16_add_10_sse4; c->transform_add[3] = ff_hevc_transform_32x32_add_10_sse4; c->put_hevc_epel[0][0] = ff_hevc_put_hevc_epel_pixels_10_sse; c->put_hevc_epel[0][1] = ff_hevc_put_hevc_epel_h_10_sse; c->put_hevc_epel[1][0] = ff_hevc_put_hevc_epel_v_10_sse; c->put_hevc_epel[1][1] = ff_hevc_put_hevc_epel_hv_10_sse; c->put_hevc_qpel[0][0] = ff_hevc_put_hevc_qpel_pixels_10_sse; c->put_hevc_qpel[0][1] = ff_hevc_put_hevc_qpel_h_1_10_sse; c->put_hevc_qpel[1][0] = ff_hevc_put_hevc_qpel_v_1_10_sse; c->put_hevc_qpel[2][0] = ff_hevc_put_hevc_qpel_v_2_10_sse; c->put_hevc_qpel[3][0] = ff_hevc_put_hevc_qpel_v_3_10_sse; } if (EXTERNAL_AVX(mm_flags)) { } } } } }
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int cpu_flags = av_get_cpu_flags(); if (bit_depth == 8) { #if (HAVE_MMXEXT_EXTERNAL == 1) if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext; c->transform_add[0] = ff_hevc_transform_add4_8_mmxext; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; } c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2; c->transform_add[1] = ff_hevc_transform_add8_8_sse2; c->transform_add[2] = ff_hevc_transform_add16_8_sse2; c->transform_add[3] = ff_hevc_transform_add32_8_sse2; } #endif #if (HAVE_SSSE3_EXTERNAL == 1) if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; } #endif #if (HAVE_SSE4_EXTERNAL == 1) if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } #endif #if (HAVE_AVX_EXTERNAL == 1) if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; } c->transform_add[1] = ff_hevc_transform_add8_8_avx; c->transform_add[2] = ff_hevc_transform_add16_8_avx; c->transform_add[3] = ff_hevc_transform_add32_8_avx; } #endif #if (HAVE_AVX2_EXTERNAL == 1) if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2; c->transform_add[3] = ff_hevc_transform_add32_8_avx2; } #endif } else if (bit_depth == 10) { #if (HAVE_MMXEXT_EXTERNAL == 1) if (EXTERNAL_MMXEXT(cpu_flags)) { c->transform_add[0] = ff_hevc_transform_add4_10_mmxext; c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; } c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2; c->transform_add[1] = ff_hevc_transform_add8_10_sse2; c->transform_add[2] = ff_hevc_transform_add16_10_sse2; c->transform_add[3] = ff_hevc_transform_add32_10_sse2; } #endif #if (HAVE_SSSE3_EXTERNAL == 1) if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } #endif #if (HAVE_SSE4_EXTERNAL == 1) if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } #endif #if (HAVE_AVX_EXTERNAL == 1) if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; } } #endif #if (HAVE_AVX2_EXTERNAL == 1) if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2; c->transform_add[2] = ff_hevc_transform_add16_10_avx2; c->transform_add[3] = ff_hevc_transform_add32_10_avx2; } #endif } else if (bit_depth == 12) { #if (HAVE_MMXEXT_EXTERNAL == 1) if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; } c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2; } #endif #if (HAVE_SSSE3_EXTERNAL == 1) if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; } #endif #if (HAVE_SSE4_EXTERNAL == 1) if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); } #endif #if (HAVE_AVX_EXTERNAL == 1) if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; } } #endif #if (HAVE_AVX2_EXTERNAL == 1) if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; } #endif } }