av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth) { int cpu_flags = av_get_cpu_flags(); if (depth > 8) { if (EXTERNAL_SSE2(cpu_flags)) { if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_16_sse2; else if (s->lowpass == VLPF_CMP) s->lowpass_line = ff_lowpass_line_complex_12_sse2; } if (EXTERNAL_AVX(cpu_flags)) if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_16_avx; } else { if (EXTERNAL_SSE2(cpu_flags)) { if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_sse2; else if (s->lowpass == VLPF_CMP) s->lowpass_line = ff_lowpass_line_complex_sse2; } if (EXTERNAL_AVX(cpu_flags)) if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_avx; } }
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } } else if (bit_depth == 10) { if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; } if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } } }
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); if (bit_depth == 8) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) { } if (EXTERNAL_SSSE3(mm_flags)) { } if (EXTERNAL_SSE4(mm_flags)) { c->pred_planar[0]= pred_planar_0_8_sse; c->pred_planar[1]= pred_planar_1_8_sse; c->pred_planar[2]= pred_planar_2_8_sse; c->pred_planar[3]= pred_planar_3_8_sse; // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance c->pred_angular[1]= pred_angular_1_8_sse; c->pred_angular[2]= pred_angular_2_8_sse; c->pred_angular[3]= pred_angular_3_8_sse; } if (EXTERNAL_AVX(mm_flags)) { } } } } else if (bit_depth == 10) { if (EXTERNAL_MMX(mm_flags)) { if (EXTERNAL_MMXEXT(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) { } if (EXTERNAL_SSE4(mm_flags)) { } if (EXTERNAL_AVX(mm_flags)) { } } } } }
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) { int high_bit_depth = bit_depth > 8; int cpu_flags = av_get_cpu_flags(); #if (HAVE_MMX_EXTERNAL == 1) if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; } #endif #if (HAVE_AMD3DNOW_EXTERNAL == 1) if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; } #endif #if (HAVE_MMXEXT_EXTERNAL == 1) if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; } if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; } #endif #if (HAVE_SSSE3_EXTERNAL == 1) if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; } #endif #if (HAVE_AVX_EXTERNAL == 1) if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { // AVX implies !cache64. // TODO: Port cache(32|64) detection from x264. c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; } #endif }
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->float_interleave = float_interleave_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { c->float_to_int16 = ff_float_to_int16_3dnow; c->float_to_int16_interleave = float_to_int16_interleave_3dnow; } } if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; } } if (EXTERNAL_SSE(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; c->float_to_int16 = ff_float_to_int16_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse; c->float_interleave = float_interleave_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; c->float_to_int16 = ff_float_to_int16_sse2; c->float_to_int16_interleave = float_to_int16_interleave_sse2; } #endif /* HAVE_YASM */ }
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE(cpu_flags)) { c->mct_decode[FF_DWT97] = ff_ict_float_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->mct_decode[FF_DWT53] = ff_rct_int_sse2; } if (EXTERNAL_AVX_FAST(cpu_flags)) { c->mct_decode[FF_DWT97] = ff_ict_float_avx; } if (EXTERNAL_FMA4(cpu_flags)) { c->mct_decode[FF_DWT97] = ff_ict_float_fma4; } if (EXTERNAL_FMA3_FAST(cpu_flags)) { c->mct_decode[FF_DWT97] = ff_ict_float_fma3; } if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->mct_decode[FF_DWT53] = ff_rct_int_avx2; } }
av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); #if (HAVE_MMX_INLINE == 1) if (INLINE_MMX(cpu_flags)) { c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEAUTO || avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; c->perm_type = FF_IDCT_PERM_SIMPLE; } } #endif #if (HAVE_MMX_EXTERNAL == 1) if (EXTERNAL_MMX(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; } #endif }
void ff_diracdsp_init_x86(DiracDSPContext* c) { int mm_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(mm_flags)) { c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; #if !ARCH_X86_64 c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; c->dirac_hpel_filter = dirac_hpel_filter_mmx; c->add_rect_clamped = ff_add_rect_clamped_mmx; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx; #endif PIXFUNC(put, 0, mmx); PIXFUNC(avg, 0, mmx); } if (EXTERNAL_MMXEXT(mm_flags)) { PIXFUNC(avg, 0, mmxext); } if (EXTERNAL_SSE2(mm_flags)) { c->dirac_hpel_filter = dirac_hpel_filter_sse2; c->add_rect_clamped = ff_add_rect_clamped_sse2; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; } }
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int mm_flags = av_get_cpu_flags(); #if HAVE_6REGS && HAVE_INLINE_ASM if (INLINE_AMD3DNOWEXT(mm_flags)) { fdsp->vector_fmul_window = vector_fmul_window_3dnowext; } if (INLINE_SSE(mm_flags)) { fdsp->vector_fmul_window = vector_fmul_window_sse; } #endif if (EXTERNAL_SSE(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; fdsp->vector_fmul_add = ff_vector_fmul_add_sse; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; fdsp->scalarproduct_float = ff_scalarproduct_float_sse; } if (EXTERNAL_SSE2(mm_flags)) { fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; } if (EXTERNAL_AVX(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } }
av_cold void ff_g722dsp_init_x86(G722DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) dsp->apply_qmf = ff_g722_apply_qmf_sse2; }
av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); if (high_bit_depth || !(avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_XVID)) return; #if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags)) { c->idct_put = xvid_idct_mmx_put; c->idct_add = xvid_idct_mmx_add; c->idct = ff_xvid_idct_mmx; c->perm_type = FF_IDCT_PERM_NONE; } if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_put = xvid_idct_mmxext_put; c->idct_add = xvid_idct_mmxext_add; c->idct = ff_xvid_idct_mmxext; c->perm_type = FF_IDCT_PERM_NONE; } #endif if (EXTERNAL_SSE2(cpu_flags)) { c->idct_put = ff_xvid_idct_put_sse2; c->idct_add = ff_xvid_idct_add_sse2; c->idct = ff_xvid_idct_sse2; c->perm_type = FF_IDCT_PERM_SSE2; } #endif /* HAVE_YASM */ }
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) { int cpuflags = av_get_cpu_flags(); #if ARCH_X86_32 if (EXTERNAL_MMX(cpuflags)) { c->idct_put = ff_vp3_idct_put_mmx; c->idct_add = ff_vp3_idct_add_mmx; c->idct_perm = FF_PARTTRANS_IDCT_PERM; } #endif if (EXTERNAL_MMXEXT(cpuflags)) { c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; if (!(flags & CODEC_FLAG_BITEXACT)) { c->v_loop_filter = ff_vp3_v_loop_filter_mmx2; c->h_loop_filter = ff_vp3_h_loop_filter_mmx2; } } if (EXTERNAL_SSE2(cpuflags)) { c->idct_put = ff_vp3_idct_put_sse2; c->idct_add = ff_vp3_idct_add_sse2; c->idct_perm = FF_TRANSPOSE_IDCT_PERM; } }
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) { int cpu_flags = av_get_cpu_flags(); const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); if (EXTERNAL_MMX(cpu_flags)) { c->add_int16 = ff_add_int16_mmx; c->diff_int16 = ff_diff_int16_mmx; } if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) { c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->add_int16 = ff_add_int16_sse2; c->diff_int16 = ff_diff_int16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4; } }
av_cold void ff_dnxhdenc_init_x86(DNXHDEncContext *ctx) { if (EXTERNAL_SSE2(av_get_cpu_flags())) { if (ctx->cid_table->bit_depth == 8) ctx->get_pixels_8x4_sym = ff_get_pixels_8x4_sym_sse2; } }
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit) { int cpu_flags = av_get_cpu_flags(); if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break; case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break; case BLEND_AND: param->blend = ff_blend_and_sse2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break; case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break; case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break; case BLEND_OR: param->blend = ff_blend_or_sse2; break; case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; } } if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) { switch (param->mode) { case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break; case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break; } } }
av_cold void ff_nlmeans_init_x86(NLMeansFunctions* func) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { func->buildIntegralImage = buildIntegralImage_SSE; } }
av_cold void ff_maskedmerge_init_x86(MaskedMergeContext *s) { int cpu_flags = av_get_cpu_flags(); if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && s->depth == 8) { s->maskedmerge = ff_maskedmerge8_sse2; } }
av_cold void swri_resample_dsp_x86_init(ResampleContext *c) { int av_unused mm_flags = av_get_cpu_flags(); switch(c->format){ case AV_SAMPLE_FMT_S16P: if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext : ff_resample_common_int16_mmxext; } if (EXTERNAL_SSE2(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2 : ff_resample_common_int16_sse2; } if (EXTERNAL_XOP(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_int16_xop : ff_resample_common_int16_xop; } break; case AV_SAMPLE_FMT_FLTP: if (EXTERNAL_SSE(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_float_sse : ff_resample_common_float_sse; } if (EXTERNAL_AVX_FAST(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_float_avx : ff_resample_common_float_avx; } if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) { c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 : ff_resample_common_float_fma3; } if (EXTERNAL_FMA4(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_float_fma4 : ff_resample_common_float_fma4; } break; case AV_SAMPLE_FMT_DBLP: if (EXTERNAL_SSE2(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_double_sse2 : ff_resample_common_double_sse2; } break; } }
av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) c->bswap_buf = ff_bswap32_buf_sse2; if (EXTERNAL_SSSE3(cpu_flags)) c->bswap_buf = ff_bswap32_buf_ssse3; }
av_cold void ff_interlace_init_x86(InterlaceContext *s) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) s->lowpass_line = ff_lowpass_line_sse2; if (EXTERNAL_AVX(cpu_flags)) s->lowpass_line = ff_lowpass_line_avx; }
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmx; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (!bit_exact) { c->float_to_fixed24 = ff_float_to_fixed24_3dnow; } } if (EXTERNAL_MMXEXT(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; if (bit_exact) { c->apply_window_int16 = ff_apply_window_int16_mmxext; } else { c->apply_window_int16 = ff_apply_window_int16_round_mmxext; } } if (EXTERNAL_SSE(cpu_flags)) { c->float_to_fixed24 = ff_float_to_fixed24_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; c->extract_exponents = ff_ac3_extract_exponents_sse2; if (bit_exact) { c->apply_window_int16 = ff_apply_window_int16_sse2; } } if (EXTERNAL_SSE2_FAST(cpu_flags)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; if (!bit_exact) { c->apply_window_int16 = ff_apply_window_int16_round_sse2; } } if (EXTERNAL_SSSE3(cpu_flags)) { c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; if (cpu_flags & AV_CPU_FLAG_ATOM) { c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; } else { c->extract_exponents = ff_ac3_extract_exponents_ssse3; c->apply_window_int16 = ff_apply_window_int16_ssse3; } } }
av_cold void ff_dct_init_x86(DCTContext *s) { int has_vectors = av_get_cpu_flags(); if (EXTERNAL_SSE(has_vectors)) s->dct32 = ff_dct32_float_sse; if (EXTERNAL_SSE2(has_vectors)) s->dct32 = ff_dct32_float_sse2; if (EXTERNAL_AVX(has_vectors)) s->dct32 = ff_dct32_float_avx; }
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { sad[2] = ff_pixelutils_sad_8x8_mmx; } // The best way to use SSE2 would be to do 2 SADs in parallel, // but we'd have to modify the pixelutils API to return SIMD functions. // It's probably not faster to shuffle data around // to get two lines of 8 pixels into a single 16byte register, // so just use the MMX 8x8 version even when SSE2 is available. if (EXTERNAL_MMXEXT(cpu_flags)) { sad[2] = ff_pixelutils_sad_8x8_mmxext; sad[3] = ff_pixelutils_sad_16x16_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { switch (aligned) { case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned } } if (EXTERNAL_SSE2(cpu_flags)) { switch (aligned) { case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned } } if (EXTERNAL_AVX2_FAST(cpu_flags)) { switch (aligned) { case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned } } }
av_cold void ff_dct_init_x86(DCTContext *s) { int cpu_flags = av_get_cpu_flags(); if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags)) s->dct32 = ff_dct32_float_sse; if (EXTERNAL_SSE2(cpu_flags)) s->dct32 = ff_dct32_float_sse2; if (EXTERNAL_AVX_FAST(cpu_flags)) s->dct32 = ff_dct32_float_avx; }
av_cold void ff_init_lls_x86(LLSModel2 *m) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { m->update_lls = ff_update_lls_sse2; if (m->indep_count >= 4) m->evaluate_lls = ff_evaluate_lls_sse2; } if (EXTERNAL_AVX(cpu_flags)) { m->update_lls = ff_update_lls_avx; } }
av_cold void ff_vp6dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec) { int cpu_flags = av_get_cpu_flags(); #if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags)) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; } #endif if (EXTERNAL_SSE2(cpu_flags)) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; } }
av_cold void ff_yadif_init_x86(YADIFContext *yadif) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth_minus1 + 1; if (bit_depth >= 15) { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_ssse3; if (EXTERNAL_SSE4(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse4; } else if ( bit_depth >= 9 && bit_depth <= 14) { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_ssse3; } else { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_ssse3; } #endif /* HAVE_YASM */ }
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { c->restore_rgb_planes = ff_restore_rgb_planes_sse2; c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2; } if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->restore_rgb_planes = ff_restore_rgb_planes_avx2; c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2; } }
av_cold void ff_yadif_init_x86(YADIFContext *yadif) { int cpu_flags = av_get_cpu_flags(); #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_mmxext; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_ssse3; }
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); #if HAVE_6REGS && HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) ff_vc1dsp_init_mmx(dsp); #endif #if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM if (INLINE_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); #endif #define ASSIGN_LF(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT #if HAVE_YASM if (EXTERNAL_MMX(cpu_flags)) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (EXTERNAL_MMXEXT(cpu_flags)) { ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF(ssse3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; } #endif /* HAVE_YASM */ }