Example #1
0
av_cold void ff_fft_init_x86(FFTContext *s)
{
  int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
  if (EXTERNAL_AMD3DNOW(cpu_flags)) {
    /* 3DNow! for K6-2/3 */
    s->imdct_calc = ff_imdct_calc_3dnow;
    s->imdct_half = ff_imdct_half_3dnow;
    s->fft_calc   = ff_fft_calc_3dnow;
  }
  if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
    /* 3DNowEx for K7 */
    s->imdct_calc = ff_imdct_calc_3dnowext;
    s->imdct_half = ff_imdct_half_3dnowext;
    s->fft_calc   = ff_fft_calc_3dnowext;
  }
#endif
  if (EXTERNAL_SSE(cpu_flags)) {
    /* SSE for P3/P4/K8 */
    s->imdct_calc  = ff_imdct_calc_sse;
    s->imdct_half  = ff_imdct_half_sse;
    s->fft_permute = ff_fft_permute_sse;
    s->fft_calc    = ff_fft_calc_sse;
    s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
  }
  if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) {
    /* AVX for SB */
    s->imdct_half      = ff_imdct_half_avx;
    s->fft_calc        = ff_fft_calc_avx;
    s->fft_permutation = FF_FFT_PERM_AVX;
  }
}
Example #2
0
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();
    if (EXTERNAL_SSE(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_sse;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
    }

    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_avx;
    }

    if (EXTERNAL_FMA4(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_fma4;
    }

    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_fma3;
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
    }
}
Example #3
0
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->float_interleave = float_interleave_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
            c->float_to_int16            = ff_float_to_int16_3dnow;
            c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
        }
    }
    if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
            c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
        c->float_to_int16             = ff_float_to_int16_sse;
        c->float_to_int16_interleave  = float_to_int16_interleave_sse;
        c->float_interleave           = float_interleave_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
        c->float_to_int16             = ff_float_to_int16_sse2;
        c->float_to_int16_interleave  = float_to_int16_interleave_sse2;
    }
#endif /* HAVE_YASM */
}
Example #4
0
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{
    int mm_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_AMD3DNOWEXT(mm_flags)) {
        fdsp->vector_fmul_window  = vector_fmul_window_3dnowext;
    }
    if (INLINE_SSE(mm_flags)) {
        fdsp->vector_fmul_window = vector_fmul_window_sse;
    }
#endif
    if (EXTERNAL_SSE(mm_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_sse;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
        fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_sse;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
        fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
    }
    if (EXTERNAL_SSE2(mm_flags)) {
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
    }
    if (EXTERNAL_AVX(mm_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_avx;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
    }
}
Example #5
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_mmxext;
        } else {
            c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_sse2;
        }
    }

    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        if (!bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_round_sse2;
        }
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (cpu_flags & AV_CPU_FLAG_ATOM) {
            c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
        } else {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
            c->apply_window_int16 = ff_apply_window_int16_ssse3;
        }
    }
}
Example #6
0
av_cold void ff_dct_init_x86(DCTContext *s)
{
    int has_vectors = av_get_cpu_flags();
    if (EXTERNAL_SSE(has_vectors))
        s->dct32 = ff_dct32_float_sse;
    if (EXTERNAL_SSE2(has_vectors))
        s->dct32 = ff_dct32_float_sse2;
    if (EXTERNAL_AVX(has_vectors))
        s->dct32 = ff_dct32_float_avx;
}
av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_AMD3DNOW(cpu_flags))
        dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow;
#endif /* ARCH_X86_32 */
    if (EXTERNAL_SSE(cpu_flags))
        dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse;
}
Example #8
0
av_cold void ff_dct_init_x86(DCTContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
        s->dct32 = ff_dct32_float_sse;
    if (EXTERNAL_SSE2(cpu_flags))
        s->dct32 = ff_dct32_float_sse2;
    if (EXTERNAL_AVX_FAST(cpu_flags))
        s->dct32 = ff_dct32_float_avx;
}
Example #9
0
void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(mm_flags)) {
        s->sum_square = ff_sbr_sum_square_sse;
        s->sum64x5    = ff_sbr_sum64x5_sse;
        s->hf_g_filt  = ff_sbr_hf_g_filt_sse;
        s->hf_gen     = ff_sbr_hf_gen_sse;
        s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
    }
}
Example #10
0
av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
#if ARCH_X86_32
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
#endif
        c->put_vp8_epel_pixels_tab[1][0][0]     =
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
    }

    /* note that 4-tap width=16 functions are missing because w=16
     * is only used for luma, and luma is always a copy or sixtap. */
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        VP8_MC_FUNC(2, 4, mmxext);
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
#if ARCH_X86_32
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
        VP8_MC_FUNC(1, 8, mmxext);
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
        VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
#endif
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
    }

    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
        VP8_LUMA_MC_FUNC(0, 16, sse2);
        VP8_MC_FUNC(1, 8, sse2);
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
        VP8_MC_FUNC(1, 8, ssse3);
        VP8_MC_FUNC(2, 4, ssse3);
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
    }
#endif /* HAVE_YASM */
}
Example #11
0
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
{
    int av_unused mm_flags = av_get_cpu_flags();

    switch(c->format){
    case AV_SAMPLE_FMT_S16P:
        if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
                                        : ff_resample_common_int16_mmxext;
        }
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
                                        : ff_resample_common_int16_sse2;
        }
        if (EXTERNAL_XOP(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
                                        : ff_resample_common_int16_xop;
        }
        break;
    case AV_SAMPLE_FMT_FLTP:
        if (EXTERNAL_SSE(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
                                        : ff_resample_common_float_sse;
        }
        if (EXTERNAL_AVX_FAST(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                        : ff_resample_common_float_avx;
        }
        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                        : ff_resample_common_float_fma3;
        }
        if (EXTERNAL_FMA4(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
                                        : ff_resample_common_float_fma4;
        }
        break;
    case AV_SAMPLE_FMT_DBLP:
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
                                        : ff_resample_common_double_sse2;
        }
        break;
    }
}
Example #12
0
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{
    int cpu_flags = av_get_cpu_flags();

#if (HAVE_AMD3DNOWEXT_EXTERNAL == 1)
    if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
        fdsp->vector_fmul_window = ff_vector_fmul_window_3dnowext;
    }
#endif
#if (HAVE_SSE_EXTERNAL == 1)
    if (EXTERNAL_SSE(cpu_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_sse;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
        fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
        fdsp->vector_fmul_window = ff_vector_fmul_window_sse;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_sse;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
        fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
        fdsp->butterflies_float   = ff_butterflies_float_sse;
    }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
    if (EXTERNAL_SSE2(cpu_flags)) {
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
    }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
    if (EXTERNAL_AVX(cpu_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_avx;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
    }
#endif
#if (HAVE_FMA3_EXTERNAL == 1)
    if (EXTERNAL_FMA3(cpu_flags)) {
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_fma3;
    }
#endif
}
Example #13
0
av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(cpu_flags)) {
#if ARCH_X86_32
        s->decode_hf = ff_decode_hf_sse;
#endif
        s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
        s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse2;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse4;
    }
}
Example #14
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(mm_flags)) {
        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
    }
    if (EXTERNAL_SSE(mm_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(mm_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        }
    }
    if (EXTERNAL_SSSE3(mm_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
        }
    }
}
Example #15
0
av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_SSE(cpu_flags)) {
        s->synth_filter_float = synth_filter_sse;
    }
#endif
    if (EXTERNAL_SSE2(cpu_flags)) {
        s->synth_filter_float = synth_filter_sse2;
    }
    if (EXTERNAL_AVX(cpu_flags)) {
        s->synth_filter_float = synth_filter_avx;
    }
    if (EXTERNAL_FMA3(cpu_flags)) {
        s->synth_filter_float = synth_filter_fma3;
    }
#endif /* HAVE_YASM */
}
Example #16
0
av_cold void ff_psdsp_init_x86(PSDSPContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(cpu_flags)) {
        s->add_squares            = ff_ps_add_squares_sse;
        s->mul_pair_single        = ff_ps_mul_pair_single_sse;
        s->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_sse;
        s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse;
        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse;
    }
    if (EXTERNAL_SSE3(cpu_flags)) {
        s->add_squares            = ff_ps_add_squares_sse3;
        s->stereo_interpolate[0]  = ff_ps_stereo_interpolate_sse3;
        s->stereo_interpolate[1]  = ff_ps_stereo_interpolate_ipdopd_sse3;
        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse4;
    }
}
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
                                  AVCodecContext *avctx)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (!high_bit_depth) {
        if (EXTERNAL_MMX(cpu_flags)) {
            c->clear_block  = ff_clear_block_mmx;
            c->clear_blocks = ff_clear_blocks_mmx;
        }

    /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
    if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
        return;

        if (EXTERNAL_SSE(cpu_flags)) {
            c->clear_block  = ff_clear_block_sse;
            c->clear_blocks = ff_clear_blocks_sse;
        }
    }
#endif /* HAVE_YASM */
}
Example #18
0
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(cpu_flags)) {
        s->neg_odd_64 = ff_sbr_neg_odd_64_sse;
        s->sum_square = ff_sbr_sum_square_sse;
        s->sum64x5    = ff_sbr_sum64x5_sse;
        s->hf_g_filt  = ff_sbr_hf_g_filt_sse;
        s->hf_gen     = ff_sbr_hf_gen_sse;
        s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
        s->qmf_deint_bfly   = ff_sbr_qmf_deint_bfly_sse;
        s->qmf_deint_neg    = ff_sbr_qmf_deint_neg_sse;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        s->qmf_deint_bfly   = ff_sbr_qmf_deint_bfly_sse2;
        s->qmf_pre_shuffle  = ff_sbr_qmf_pre_shuffle_sse2;
        s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
        s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_sse2;
        s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_sse2;
        s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_sse2;
    }
}
Example #19
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_X86ASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt)                            \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] =                    \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_ ## type ## sz ## _ ## opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _ ## opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _ ## opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);

#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
        init_subpel3_32_64(0, put, avx2);
        init_subpel3_32_64(1, avg, avx2);
#endif /* ARCH_X86_64 && HAVE_AVX2_EXTERNAL */
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_X86ASM */
}
av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                                   const int bit_depth,
                                   const int chroma_format_idc)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
#if HAVE_MMX_EXTERNAL
        if (EXTERNAL_MMX(mm_flags)) {
            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
            if (chroma_format_idc == 1) {
                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_mmx;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_mmx;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_mmx;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    if (mm_flags & AV_CPU_FLAG_CMOV)
                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
                }
            }
        }
#endif
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmxext;
            if (chroma_format_idc == 1)
                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
            if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
            }
            if (codec_id != AV_CODEC_ID_RV40) {
                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
            }
            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                if (chroma_format_idc == 1) {
                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
                }
            }
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmxext;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmxext;
                } else {
                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmxext;
                }
            }
        }
#endif
#if HAVE_SSE_EXTERNAL
        if (EXTERNAL_SSE(mm_flags)) {
            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
                }
            }
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
            if (chroma_format_idc == 1)
                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
            if (codec_id == AV_CODEC_ID_VP8) {
                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
            } else {
                if (chroma_format_idc == 1)
                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
                if (codec_id == AV_CODEC_ID_SVQ3) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
                } else if (codec_id == AV_CODEC_ID_RV40) {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
                } else {
                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
                }
            }
        }
#endif
    } else if (bit_depth == 10) {
#if HAVE_MMXEXT_EXTERNAL
        if (EXTERNAL_MMXEXT(mm_flags)) {
            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;

            if (chroma_format_idc == 1)
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;

            h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_mmxext;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_mmxext;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_mmxext;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_mmxext;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_mmxext;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_mmxext;
        }
#endif
#if HAVE_SSE2_EXTERNAL
        if (EXTERNAL_SSE2(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;

            if (chroma_format_idc == 1) {
                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
            }

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;

            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
        }
#endif
#if HAVE_SSSE3_EXTERNAL
        if (EXTERNAL_SSSE3(mm_flags)) {
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;

            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
        }
#endif
#if HAVE_AVX_EXTERNAL
        if (EXTERNAL_AVX(mm_flags)) {
            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;

            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
        }
#endif
    }
}
Example #21
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_##type##sz##_##opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx, idxh, idxv, dir, type, opt) \
    init_subpel1(0, idx, idxh, idxv, 64, dir, type, opt); \
    init_subpel1(1, idx, idxh, idxv, 32, dir, type, opt); \
    init_subpel1(2, idx, idxh, idxv, 16, dir, type, opt); \
    init_subpel1(3, idx, idxh, idxv,  8, dir, type, opt); \
    init_subpel1(4, idx, idxh, idxv,  4, dir, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel2(idx, 1, 1, hv, type, opt); \
    init_subpel2(idx, 0, 1, v, type, opt); \
    init_subpel2(idx, 1, 0, h, type, opt)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
        init_fpel(4, 1,  4, avg, sse);
        init_fpel(3, 1,  8, avg, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        if (ARCH_X86_64)
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
Example #22
0
av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
#if ARCH_X86_32
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;

        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
#endif
    }

    /* note that 4-tap width=16 functions are missing because w=16
     * is only used for luma, and luma is always a copy or sixtap. */
    if (EXTERNAL_MMXEXT(cpu_flags)) {
#if ARCH_X86_32
        c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
#endif
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
    }

    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;

        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;

        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;

        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;

        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
    }
#endif /* HAVE_YASM */
}
Example #23
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_##type##sz##_##opt


#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx, idxh, idxv, dir, type, opt) \
    init_subpel1(0, idx, idxh, idxv, 64, dir, type, opt); \
    init_subpel1(1, idx, idxh, idxv, 32, dir, type, opt); \
    init_subpel1(2, idx, idxh, idxv, 16, dir, type, opt); \
    init_subpel1(3, idx, idxh, idxv,  8, dir, type, opt); \
    init_subpel1(4, idx, idxh, idxv,  4, dir, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel2(idx, 1, 1, hv, type, opt); \
    init_subpel2(idx, 0, 1, v, type, opt); \
    init_subpel2(idx, 1, 0, h, type, opt)

#define init_lpf(opt) do { \
    if (ARCH_X86_64) { \
        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
    } \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        }
        init_lpf(ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        if (ARCH_X86_64) {
            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
            dsp->itxfm_add[TX_32X32][ADST_ADST] =
            dsp->itxfm_add[TX_32X32][ADST_DCT] =
            dsp->itxfm_add[TX_32X32][DCT_ADST] =
            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        }
        init_lpf(avx);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
Example #24
0
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                  0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                      0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                      6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2);
        } else {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow);
        }
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
    }
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
    }
    if (EXTERNAL_AVX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
    }
}
Example #25
0
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

#define init_fpel(idx1, idx2, sz, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##_##opt

#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt

#define init_subpel2(idx1, idx2, sz, type, opt) \
    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)

#define init_subpel3_32_64(idx, type, opt) \
    init_subpel2(0, idx, 64, type, opt); \
    init_subpel2(1, idx, 32, type, opt)

#define init_subpel3_8to64(idx, type, opt) \
    init_subpel3_32_64(idx, type, opt); \
    init_subpel2(2, idx, 16, type, opt); \
    init_subpel2(3, idx,  8, type, opt)

#define init_subpel3(idx, type, opt) \
    init_subpel3_8to64(idx, type, opt); \
    init_subpel2(4, idx,  4, type, opt)

#define init_lpf(opt) do { \
    if (ARCH_X86_64) { \
        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
    } \
} while (0)

#define init_ipred(sz, opt, t, e) \
    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt

#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
#define init_dir_tm_ipred(sz, opt) do { \
    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
    init_ipred(sz, opt, hd, HOR_DOWN); \
    init_ipred(sz, opt, vl, VERT_LEFT); \
    init_ipred(sz, opt, hu, HOR_UP); \
    init_ipred(sz, opt, tm, TM_VP8); \
    init_ipred(sz, opt, vr, VERT_RIGHT); \
} while (0)
#define init_dir_tm_h_ipred(sz, opt) do { \
    init_dir_tm_ipred(sz, opt); \
    init_ipred(sz, opt, h,  HOR); \
} while (0)
#define init_dc_ipred(sz, opt) do { \
    init_ipred(sz, opt, dc,      DC); \
    init_ipred(sz, opt, dc_left, LEFT_DC); \
    init_ipred(sz, opt, dc_top,  TOP_DC); \
} while (0)
#define init_all_ipred(sz, opt) do { \
    init_dc_ipred(sz, opt); \
    init_dir_tm_h_ipred(sz, opt); \
} while (0)

    if (EXTERNAL_MMX(cpu_flags)) {
        init_fpel(4, 0,  4, put, mmx);
        init_fpel(3, 0,  8, put, mmx);
        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
        init_ipred(8, mmx, v, VERT);
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        init_subpel2(4, 0, 4, put, mmxext);
        init_subpel2(4, 1, 4, avg, mmxext);
        init_fpel(4, 1,  4, avg, mmxext);
        init_fpel(3, 1,  8, avg, mmxext);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
        init_dc_ipred(4, mmxext);
        init_dc_ipred(8, mmxext);
        init_dir_tm_ipred(4, mmxext);
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        init_fpel(2, 0, 16, put, sse);
        init_fpel(1, 0, 32, put, sse);
        init_fpel(0, 0, 64, put, sse);
        init_ipred(16, sse, v, VERT);
        init_ipred(32, sse, v, VERT);
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        init_subpel3_8to64(0, put, sse2);
        init_subpel3_8to64(1, avg, sse2);
        init_fpel(2, 1, 16, avg, sse2);
        init_fpel(1, 1, 32, avg, sse2);
        init_fpel(0, 1, 64, avg, sse2);
        init_lpf(sse2);
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
        init_dc_ipred(16, sse2);
        init_dc_ipred(32, sse2);
        init_dir_tm_h_ipred(8, sse2);
        init_dir_tm_h_ipred(16, sse2);
        init_dir_tm_h_ipred(32, sse2);
        init_ipred(4, sse2, h, HOR);
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        init_subpel3(0, put, ssse3);
        init_subpel3(1, avg, ssse3);
        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
        init_lpf(ssse3);
        init_all_ipred(4, ssse3);
        init_all_ipred(8, ssse3);
        init_all_ipred(16, ssse3);
        init_all_ipred(32, ssse3);
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
        dsp->itxfm_add[TX_32X32][ADST_ADST] =
        dsp->itxfm_add[TX_32X32][ADST_DCT] =
        dsp->itxfm_add[TX_32X32][DCT_ADST] =
        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
        init_fpel(1, 0, 32, put, avx);
        init_fpel(0, 0, 64, put, avx);
        init_lpf(avx);
        init_dir_tm_h_ipred(8, avx);
        init_dir_tm_h_ipred(16, avx);
        init_dir_tm_h_ipred(32, avx);
        init_ipred(32, avx, v, VERT);
    }

    if (EXTERNAL_AVX2(cpu_flags)) {
        init_fpel(1, 1, 32, avg, avx2);
        init_fpel(0, 1, 64, avg, avx2);
        if (ARCH_X86_64) {
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
            init_subpel3_32_64(0, put, avx2);
            init_subpel3_32_64(1, avg, avx2);
#endif
        }
        init_dc_ipred(32, avx2);
        init_ipred(32, avx2, h,  HOR);
        init_ipred(32, avx2, tm, TM_VP8);
    }

#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3

#endif /* HAVE_YASM */
}
Example #26
0
void swri_resample_dsp_x86_init(ResampleContext *c)
{
    int av_unused mm_flags = av_get_cpu_flags();

    switch(c->format){
    case AV_SAMPLE_FMT_S16P:
#if (ARCH_X86_32 == 1) && (HAVE_MMXEXT_EXTERNAL == 1)
        if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
                                        : ff_resample_common_int16_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
                                        : ff_resample_common_int16_sse2;
        }
#endif
#if (HAVE_XOP_EXTERNAL == 1)
        if (EXTERNAL_XOP(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
                                        : ff_resample_common_int16_xop;
        }
#endif
        break;
    case AV_SAMPLE_FMT_FLTP:
#if (HAVE_SSE_EXTERNAL == 1)
        if (EXTERNAL_SSE(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
                                        : ff_resample_common_float_sse;
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                        : ff_resample_common_float_avx;
        }
#endif
#if (HAVE_FMA3_EXTERNAL == 1)
        if (EXTERNAL_FMA3(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                        : ff_resample_common_float_fma3;
        }
#endif
#if (HAVE_FMA4_EXTERNAL == 1)
        if (EXTERNAL_FMA4(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
                                        : ff_resample_common_float_fma4;
        }
#endif
        break;
    case AV_SAMPLE_FMT_DBLP:
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
                                        : ff_resample_common_double_sse2;
        }
#endif
        break;
    }
}
Example #27
0
av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                 enum AVSampleFormat out_fmt,
                                 enum AVSampleFormat in_fmt,
                                 int channels){
    int mm_flags = av_get_cpu_flags();

    ac->simd_f= NULL;

//FIXME add memcpy case

#define MULTI_CAPS_FUNC(flag, cap) \
    if (EXTERNAL_##flag(mm_flags)) {\
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
            ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
    }

MULTI_CAPS_FUNC(MMX, mmx)
MULTI_CAPS_FUNC(SSE2, sse2)

    if(EXTERNAL_MMX(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
        }
    }
    if(EXTERNAL_SSE(mm_flags)) {
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_sse;
        }
    }
    if(EXTERNAL_SSE2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
            ac->simd_f =  ff_int16_to_float_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_sse2;
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int16_a_sse2;

        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;

            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
                ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
                ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
                ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
        }
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse2;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse2;
        }
    }
    if(EXTERNAL_SSSE3(mm_flags)) {
        if(channels == 2) {
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
        }
    }
    if(EXTERNAL_AVX(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
            ac->simd_f =  ff_int32_to_float_a_avx;
        if(channels == 6) {
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
        }
    }
    if(EXTERNAL_AVX2(mm_flags)) {
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
            ac->simd_f =  ff_float_to_int32_a_avx2;
    }
}