av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->float_interleave = float_interleave_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { c->float_to_int16 = ff_float_to_int16_3dnow; c->float_to_int16_interleave = float_to_int16_interleave_3dnow; } } if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; } } if (EXTERNAL_SSE(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; c->float_to_int16 = ff_float_to_int16_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse; c->float_interleave = float_interleave_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; c->float_to_int16 = ff_float_to_int16_sse2; c->float_to_int16_interleave = float_to_int16_interleave_sse2; } #endif /* HAVE_YASM */ }
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) { int high_bit_depth = bit_depth > 8; int cpu_flags = av_get_cpu_flags(); #if (HAVE_MMX_EXTERNAL == 1) if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; } #endif #if (HAVE_AMD3DNOW_EXTERNAL == 1) if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; } #endif #if (HAVE_MMXEXT_EXTERNAL == 1) if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; } if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; } #endif #if (HAVE_SSSE3_EXTERNAL == 1) if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; } #endif #if (HAVE_AVX_EXTERNAL == 1) if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { // AVX implies !cache64. // TODO: Port cache(32|64) detection from x264. c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; } #endif }
av_cold void ff_fft_init_x86(FFTContext *s) { int cpu_flags = av_get_cpu_flags(); #if ARCH_X86_32 if (EXTERNAL_AMD3DNOW(cpu_flags)) { /* 3DNow! for K6-2/3 */ s->imdct_calc = ff_imdct_calc_3dnow; s->imdct_half = ff_imdct_half_3dnow; s->fft_calc = ff_fft_calc_3dnow; } if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { /* 3DNowEx for K7 */ s->imdct_calc = ff_imdct_calc_3dnowext; s->imdct_half = ff_imdct_half_3dnowext; s->fft_calc = ff_fft_calc_3dnowext; } #endif if (EXTERNAL_SSE(cpu_flags)) { /* SSE for P3/P4/K8 */ s->imdct_calc = ff_imdct_calc_sse; s->imdct_half = ff_imdct_half_sse; s->fft_permute = ff_fft_permute_sse; s->fft_calc = ff_fft_calc_sse; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; } if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) { /* AVX for SB */ s->imdct_half = ff_imdct_half_avx; s->fft_calc = ff_fft_calc_avx; s->fft_permutation = FF_FFT_PERM_AVX; } }
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmx; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (!bit_exact) { c->float_to_fixed24 = ff_float_to_fixed24_3dnow; } } if (EXTERNAL_MMXEXT(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; if (bit_exact) { c->apply_window_int16 = ff_apply_window_int16_mmxext; } else { c->apply_window_int16 = ff_apply_window_int16_round_mmxext; } } if (EXTERNAL_SSE(cpu_flags)) { c->float_to_fixed24 = ff_float_to_fixed24_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; c->extract_exponents = ff_ac3_extract_exponents_sse2; if (bit_exact) { c->apply_window_int16 = ff_apply_window_int16_sse2; } } if (EXTERNAL_SSE2_FAST(cpu_flags)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; if (!bit_exact) { c->apply_window_int16 = ff_apply_window_int16_round_sse2; } } if (EXTERNAL_SSSE3(cpu_flags)) { c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; if (cpu_flags & AV_CPU_FLAG_ATOM) { c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; } else { c->extract_exponents = ff_ac3_extract_exponents_ssse3; c->apply_window_int16 = ff_apply_window_int16_ssse3; } } }
av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); #if ARCH_X86_32 if (EXTERNAL_AMD3DNOW(cpu_flags)) dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow; #endif /* ARCH_X86_32 */ if (EXTERNAL_SSE(cpu_flags)) dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse; }
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); #if HAVE_6REGS && HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) ff_vc1dsp_init_mmx(dsp); #endif #if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM if (INLINE_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); #endif #define ASSIGN_LF(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT #if HAVE_YASM if (EXTERNAL_MMX(cpu_flags)) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (EXTERNAL_MMXEXT(cpu_flags)) { ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF(ssse3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; } #endif /* HAVE_YASM */ }
av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) { if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (flags & AV_CODEC_FLAG_BITEXACT) { c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; } } if (EXTERNAL_MMXEXT(cpu_flags)) { if (flags & AV_CODEC_FLAG_BITEXACT) { c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; } } }
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int mm_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(mm_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmx; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; } if (EXTERNAL_AMD3DNOW(mm_flags)) { c->extract_exponents = ff_ac3_extract_exponents_3dnow; if (!bit_exact) { c->float_to_fixed24 = ff_float_to_fixed24_3dnow; } } if (EXTERNAL_MMXEXT(mm_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; } if (EXTERNAL_SSE(mm_flags)) { c->float_to_fixed24 = ff_float_to_fixed24_sse; } if (EXTERNAL_SSE2(mm_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; c->extract_exponents = ff_ac3_extract_exponents_sse2; if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; } } if (EXTERNAL_SSSE3(mm_flags)) { c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; if (!(mm_flags & AV_CPU_FLAG_ATOM)) { c->extract_exponents = ff_ac3_extract_exponents_ssse3; } } }