Esempio n. 1
0
av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
{
    int cpu_flags = av_get_cpu_flags();

    if (depth > 8) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            if (s->lowpass == VLPF_LIN)
                s->lowpass_line = ff_lowpass_line_16_sse2;
            else if (s->lowpass == VLPF_CMP)
                s->lowpass_line = ff_lowpass_line_complex_12_sse2;
        }
        if (EXTERNAL_AVX(cpu_flags))
            if (s->lowpass == VLPF_LIN)
                s->lowpass_line = ff_lowpass_line_16_avx;
    } else {
        if (EXTERNAL_SSE2(cpu_flags)) {
            if (s->lowpass == VLPF_LIN)
                s->lowpass_line = ff_lowpass_line_sse2;
            else if (s->lowpass == VLPF_CMP)
                s->lowpass_line = ff_lowpass_line_complex_sse2;
        }
        if (EXTERNAL_AVX(cpu_flags))
            if (s->lowpass == VLPF_LIN)
                s->lowpass_line = ff_lowpass_line_avx;
    }
}
Esempio n. 2
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);

        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
    }
}
Esempio n. 3
0
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {


            if (EXTERNAL_MMXEXT(mm_flags)) {


                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSSE3(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->pred_planar[0]= pred_planar_0_8_sse;
                    c->pred_planar[1]= pred_planar_1_8_sse;
                    c->pred_planar[2]= pred_planar_2_8_sse;
                    c->pred_planar[3]= pred_planar_3_8_sse;

                   // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance
                    c->pred_angular[1]= pred_angular_1_8_sse;
                    c->pred_angular[2]= pred_angular_2_8_sse;
                    c->pred_angular[3]= pred_angular_3_8_sse;
                }
                if (EXTERNAL_AVX(mm_flags)) {

                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {

                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    }
}
Esempio n. 4
0
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
{
    int high_bit_depth = bit_depth > 8;
    int cpu_flags      = av_get_cpu_flags();

#if (HAVE_MMX_EXTERNAL == 1)
    if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
    }
#endif

#if (HAVE_AMD3DNOW_EXTERNAL == 1)
    if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
    }
#endif

#if (HAVE_MMXEXT_EXTERNAL == 1)
    if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) {
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
    }
#endif

#if (HAVE_SSE2_EXTERNAL == 1)
    if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
    }
#endif

#if (HAVE_SSSE3_EXTERNAL == 1)
    if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
    }
#endif

#if (HAVE_AVX_EXTERNAL == 1)
    if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
        // AVX implies !cache64.
        // TODO: Port cache(32|64) detection from x264.
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
    }
#endif
}
Esempio n. 5
0
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->float_interleave = float_interleave_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
            c->float_to_int16            = ff_float_to_int16_3dnow;
            c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
        }
    }
    if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
            c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
        c->float_to_int16             = ff_float_to_int16_sse;
        c->float_to_int16_interleave  = float_to_int16_interleave_sse;
        c->float_interleave           = float_interleave_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
        c->float_to_int16             = ff_float_to_int16_sse2;
        c->float_to_int16_interleave  = float_to_int16_interleave_sse2;
    }
#endif /* HAVE_YASM */
}
Esempio n. 6
0
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();
    if (EXTERNAL_SSE(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_sse;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
    }

    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_avx;
    }

    if (EXTERNAL_FMA4(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_fma4;
    }

    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT97] = ff_ict_float_fma3;
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
    }
}
Esempio n. 7
0
av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#if (HAVE_MMX_INLINE == 1)
    if (INLINE_MMX(cpu_flags)) {
        c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
        c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;

        if (!high_bit_depth &&
            avctx->lowres == 0 &&
            (avctx->idct_algo == FF_IDCT_AUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
                c->idct_put  = ff_simple_idct_put_mmx;
                c->idct_add  = ff_simple_idct_add_mmx;
                c->idct      = ff_simple_idct_mmx;
                c->perm_type = FF_IDCT_PERM_SIMPLE;
        }
    }
#endif
#if (HAVE_MMX_EXTERNAL == 1)
    if (EXTERNAL_MMX(cpu_flags)) {
        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
    }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
    }
#endif
}
Esempio n. 8
0
void ff_diracdsp_init_x86(DiracDSPContext* c)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
#if !ARCH_X86_64
        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
        c->dirac_hpel_filter = dirac_hpel_filter_mmx;
        c->add_rect_clamped = ff_add_rect_clamped_mmx;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
#endif
        PIXFUNC(put, 0, mmx);
        PIXFUNC(avg, 0, mmx);
    }

    if (EXTERNAL_MMXEXT(mm_flags)) {
        PIXFUNC(avg, 0, mmxext);
    }

    if (EXTERNAL_SSE2(mm_flags)) {
        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
        c->add_rect_clamped = ff_add_rect_clamped_sse2;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;

        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;

        c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
        c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
        c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
        c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
    }
}
Esempio n. 9
0
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{
    int mm_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_AMD3DNOWEXT(mm_flags)) {
        fdsp->vector_fmul_window  = vector_fmul_window_3dnowext;
    }
    if (INLINE_SSE(mm_flags)) {
        fdsp->vector_fmul_window = vector_fmul_window_sse;
    }
#endif
    if (EXTERNAL_SSE(mm_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_sse;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
        fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_sse;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
        fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
    }
    if (EXTERNAL_SSE2(mm_flags)) {
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
    }
    if (EXTERNAL_AVX(mm_flags)) {
        fdsp->vector_fmul = ff_vector_fmul_avx;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
        fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
        fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
    }
}
Esempio n. 10
0
av_cold void ff_g722dsp_init_x86(G722DSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags))
        dsp->apply_qmf = ff_g722_apply_qmf_sse2;
}
Esempio n. 11
0
av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                                   unsigned high_bit_depth)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (high_bit_depth ||
        !(avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_XVID))
        return;

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpu_flags)) {
        c->idct_put  = xvid_idct_mmx_put;
        c->idct_add  = xvid_idct_mmx_add;
        c->idct      = ff_xvid_idct_mmx;
        c->perm_type = FF_IDCT_PERM_NONE;
    }

    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->idct_put  = xvid_idct_mmxext_put;
        c->idct_add  = xvid_idct_mmxext_add;
        c->idct      = ff_xvid_idct_mmxext;
        c->perm_type = FF_IDCT_PERM_NONE;
    }
#endif

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->idct_put  = ff_xvid_idct_put_sse2;
        c->idct_add  = ff_xvid_idct_add_sse2;
        c->idct      = ff_xvid_idct_sse2;
        c->perm_type = FF_IDCT_PERM_SSE2;
    }
#endif /* HAVE_YASM */
}
Esempio n. 12
0
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{
    int cpuflags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpuflags)) {
        c->idct_put  = ff_vp3_idct_put_mmx;
        c->idct_add  = ff_vp3_idct_add_mmx;
        c->idct_perm = FF_PARTTRANS_IDCT_PERM;
    }
#endif

    if (EXTERNAL_MMXEXT(cpuflags)) {
        c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;

        if (!(flags & CODEC_FLAG_BITEXACT)) {
            c->v_loop_filter = ff_vp3_v_loop_filter_mmx2;
            c->h_loop_filter = ff_vp3_h_loop_filter_mmx2;
        }
    }

    if (EXTERNAL_SSE2(cpuflags)) {
        c->idct_put  = ff_vp3_idct_put_sse2;
        c->idct_add  = ff_vp3_idct_add_sse2;
        c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
    }
}
Esempio n. 13
0
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();
    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);

    if (EXTERNAL_MMX(cpu_flags)) {
        c->add_int16 = ff_add_int16_mmx;
        c->diff_int16 = ff_diff_int16_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
        c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->add_int16 = ff_add_int16_sse2;
        c->diff_int16 = ff_diff_int16_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4;
    }
}
Esempio n. 14
0
av_cold void ff_dnxhdenc_init_x86(DNXHDEncContext *ctx)
{
    if (EXTERNAL_SSE2(av_get_cpu_flags())) {
        if (ctx->cid_table->bit_depth == 8)
            ctx->get_pixels_8x4_sym = ff_get_pixels_8x4_sym_sse2;
    }
}
Esempio n. 15
0
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{
    int cpu_flags = av_get_cpu_flags();

    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
        switch (param->mode) {
        case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
        case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
        case BLEND_AND:      param->blend = ff_blend_and_sse2;      break;
        case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
        case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
        case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
        case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
        case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
        case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
        case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
        }
    }
    if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
        switch (param->mode) {
        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
        case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
        }
    }
}
Esempio n. 16
0
av_cold void ff_nlmeans_init_x86(NLMeansFunctions* func)
{
  int cpu_flags = av_get_cpu_flags();

  if (EXTERNAL_SSE2(cpu_flags)) {
    func->buildIntegralImage = buildIntegralImage_SSE;
  }
}
Esempio n. 17
0
av_cold void ff_maskedmerge_init_x86(MaskedMergeContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && s->depth == 8) {
        s->maskedmerge = ff_maskedmerge8_sse2;
    }
}
Esempio n. 18
0
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
{
    int av_unused mm_flags = av_get_cpu_flags();

    switch(c->format){
    case AV_SAMPLE_FMT_S16P:
        if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
                                        : ff_resample_common_int16_mmxext;
        }
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
                                        : ff_resample_common_int16_sse2;
        }
        if (EXTERNAL_XOP(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
                                        : ff_resample_common_int16_xop;
        }
        break;
    case AV_SAMPLE_FMT_FLTP:
        if (EXTERNAL_SSE(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
                                        : ff_resample_common_float_sse;
        }
        if (EXTERNAL_AVX_FAST(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                        : ff_resample_common_float_avx;
        }
        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                        : ff_resample_common_float_fma3;
        }
        if (EXTERNAL_FMA4(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
                                        : ff_resample_common_float_fma4;
        }
        break;
    case AV_SAMPLE_FMT_DBLP:
        if (EXTERNAL_SSE2(mm_flags)) {
            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
                                        : ff_resample_common_double_sse2;
        }
        break;
    }
}
Esempio n. 19
0
av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags))
        c->bswap_buf = ff_bswap32_buf_sse2;
    if (EXTERNAL_SSSE3(cpu_flags))
        c->bswap_buf = ff_bswap32_buf_ssse3;
}
Esempio n. 20
0
av_cold void ff_interlace_init_x86(InterlaceContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags))
        s->lowpass_line = ff_lowpass_line_sse2;
    if (EXTERNAL_AVX(cpu_flags))
        s->lowpass_line = ff_lowpass_line_avx;
}
Esempio n. 21
0
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!bit_exact) {
            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
        }
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_mmxext;
        } else {
            c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_sse2;
        }
    }

    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        if (!bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_round_sse2;
        }
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (cpu_flags & AV_CPU_FLAG_ATOM) {
            c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
        } else {
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
            c->apply_window_int16 = ff_apply_window_int16_ssse3;
        }
    }
}
Esempio n. 22
0
av_cold void ff_dct_init_x86(DCTContext *s)
{
    int has_vectors = av_get_cpu_flags();
    if (EXTERNAL_SSE(has_vectors))
        s->dct32 = ff_dct32_float_sse;
    if (EXTERNAL_SSE2(has_vectors))
        s->dct32 = ff_dct32_float_sse2;
    if (EXTERNAL_AVX(has_vectors))
        s->dct32 = ff_dct32_float_avx;
}
Esempio n. 23
0
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmx;
    }

    // The best way to use SSE2 would be to do 2 SADs in parallel,
    // but we'd have to modify the pixelutils API to return SIMD functions.

    // It's probably not faster to shuffle data around
    // to get two lines of 8 pixels into a single 16byte register,
    // so just use the MMX 8x8 version even when SSE2 is available.
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        sad[2] = ff_pixelutils_sad_8x8_mmxext;
        sad[3] = ff_pixelutils_sad_16x16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        switch (aligned) {
        case 0: sad[3] = ff_pixelutils_sad_16x16_sse2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1   aligned, src2 unaligned
        case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1   aligned, src2   aligned
        }
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        switch (aligned) {
        case 0: sad[4] = ff_pixelutils_sad_32x32_sse2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1   aligned, src2 unaligned
        case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1   aligned, src2   aligned
        }
    }

    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        switch (aligned) {
        case 0: sad[4] = ff_pixelutils_sad_32x32_avx2;   break; // src1 unaligned, src2 unaligned
        case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1   aligned, src2 unaligned
        case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1   aligned, src2   aligned
        }
    }
}
Esempio n. 24
0
av_cold void ff_dct_init_x86(DCTContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
        s->dct32 = ff_dct32_float_sse;
    if (EXTERNAL_SSE2(cpu_flags))
        s->dct32 = ff_dct32_float_sse2;
    if (EXTERNAL_AVX_FAST(cpu_flags))
        s->dct32 = ff_dct32_float_avx;
}
Esempio n. 25
0
av_cold void ff_init_lls_x86(LLSModel2 *m)
{
    int cpu_flags = av_get_cpu_flags();
    if (EXTERNAL_SSE2(cpu_flags)) {
        m->update_lls = ff_update_lls_sse2;
        if (m->indep_count >= 4)
            m->evaluate_lls = ff_evaluate_lls_sse2;
    }
    if (EXTERNAL_AVX(cpu_flags)) {
        m->update_lls = ff_update_lls_avx;
    }
}
Esempio n. 26
0
av_cold void ff_vp6dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec)
{
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMX(cpu_flags)) {
        c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
    }
#endif
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
    }
}
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();
    int bit_depth = (!yadif->csp) ? 8
                                  : yadif->csp->comp[0].depth_minus1 + 1;

    if (bit_depth >= 15) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_ssse3;
        if (EXTERNAL_SSE4(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
    } else if ( bit_depth >= 9 && bit_depth <= 14) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_ssse3;
    } else {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_ssse3;
    }
#endif /* HAVE_YASM */
}
Esempio n. 28
0
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->restore_rgb_planes   = ff_restore_rgb_planes_sse2;
        c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
    }
    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
        c->restore_rgb_planes   = ff_restore_rgb_planes_avx2;
        c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2;
    }
}
Esempio n. 29
0
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
    int cpu_flags = av_get_cpu_flags();

#if ARCH_X86_32
    if (EXTERNAL_MMXEXT(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
    if (EXTERNAL_SSE2(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_sse2;
    if (EXTERNAL_SSSE3(cpu_flags))
        yadif->filter_line = ff_yadif_filter_line_ssse3;
}
Esempio n. 30
0
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_MMX(cpu_flags))
        ff_vc1dsp_init_mmx(dsp);
#endif
#if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM
    if (INLINE_MMXEXT(cpu_flags))
        ff_vc1dsp_init_mmxext(dsp);
#endif
#define ASSIGN_LF(EXT) \
        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT

#if HAVE_YASM
    if (EXTERNAL_MMX(cpu_flags)) {
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        ASSIGN_LF(mmxext);
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;

        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ASSIGN_LF(ssse3);
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
    }
#endif /* HAVE_YASM */
}