예제 #1
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);

        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
    }
}
예제 #2
0
void ff_hevcpred_init_x86(HEVCPredContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {


            if (EXTERNAL_MMXEXT(mm_flags)) {


                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSSE3(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->pred_planar[0]= pred_planar_0_8_sse;
                    c->pred_planar[1]= pred_planar_1_8_sse;
                    c->pred_planar[2]= pred_planar_2_8_sse;
                    c->pred_planar[3]= pred_planar_3_8_sse;

                   // c->pred_angular[0]= pred_angular_0_8_sse; //removed because too little data = bad performance
                    c->pred_angular[1]= pred_angular_1_8_sse;
                    c->pred_angular[2]= pred_angular_2_8_sse;
                    c->pred_angular[3]= pred_angular_3_8_sse;
                }
                if (EXTERNAL_AVX(mm_flags)) {

                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {

                if (EXTERNAL_SSE2(mm_flags)) {

                }
                if (EXTERNAL_SSE4(mm_flags)) {
                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    }
}
예제 #3
0
void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();
    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);

    if (EXTERNAL_MMX(cpu_flags)) {
        c->add_int16 = ff_add_int16_mmx;
        c->diff_int16 = ff_diff_int16_mmx;
    }

    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
        c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->add_int16 = ff_add_int16_sse2;
        c->diff_int16 = ff_diff_int16_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4;
    }
}
예제 #4
0
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

    if (X86_MMX(cpu_flags))
        dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
}
예제 #5
0
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
#endif

    if (X86_MMX(cpu_flags))
        dsputil_init_mmx(c, avctx, cpu_flags);

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx);
}
예제 #6
0
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags))
        c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
    if (EXTERNAL_MMXEXT(mm_flags)) {
        c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
        c->rv34_idct_add         = ff_rv34_idct_add_mmx2;
    }
    if (EXTERNAL_SSE4(mm_flags))
        c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
}
예제 #7
0
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_6REGS && HAVE_INLINE_ASM
    if (INLINE_MMX(cpu_flags))
        ff_vc1dsp_init_mmx(dsp);
#endif
#if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM
    if (INLINE_MMXEXT(cpu_flags))
        ff_vc1dsp_init_mmxext(dsp);
#endif
#define ASSIGN_LF(EXT) \
        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT

#if HAVE_YASM
    if (EXTERNAL_MMX(cpu_flags)) {
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        ASSIGN_LF(mmxext);
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;

        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ASSIGN_LF(ssse3);
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
    }
#endif /* HAVE_YASM */
}
예제 #8
0
파일: dsputil_init.c 프로젝트: 0Soul/FFmpeg
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#if HAVE_7REGS && HAVE_INLINE_ASM
    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_CMOV)
        c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
#endif

    if (X86_MMX(cpu_flags)) {
#if HAVE_INLINE_ASM
        const int idct_algo = avctx->idct_algo;

        if (avctx->lowres == 0 && !high_bit_depth) {
            if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
                c->idct_put              = ff_simple_idct_put_mmx;
                c->idct_add              = ff_simple_idct_add_mmx;
                c->idct                  = ff_simple_idct_mmx;
                c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
            } else if (idct_algo == FF_IDCT_XVIDMMX) {
                c->idct_put              = ff_idct_xvid_mmx_put;
                c->idct_add              = ff_idct_xvid_mmx_add;
                c->idct                  = ff_idct_xvid_mmx;
            }
        }
#endif /* HAVE_INLINE_ASM */

        dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
    }

    if (X86_MMXEXT(cpu_flags))
        dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE(cpu_flags))
        dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);

    if (X86_SSE2(cpu_flags))
        dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);

    if (EXTERNAL_SSE4(cpu_flags))
        dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);

    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
}
예제 #9
0
void ff_framerate_init_x86(FrameRateContext *s)
{
    int cpu_flags = av_get_cpu_flags();
    if (s->bitdepth == 8) {
        if (EXTERNAL_AVX2_FAST(cpu_flags))
            s->blend = ff_blend_frames_avx2;
        else if (EXTERNAL_SSSE3(cpu_flags))
            s->blend = ff_blend_frames_ssse3;
    } else {
        if (EXTERNAL_AVX2_FAST(cpu_flags))
            s->blend = ff_blend_frames16_avx2;
        else if (EXTERNAL_SSE4(cpu_flags))
            s->blend = ff_blend_frames16_sse4;
    }
}
예제 #10
0
int ff_image_copy_plane_uc_from_x86(uint8_t       *dst, ptrdiff_t dst_linesize,
                                    const uint8_t *src, ptrdiff_t src_linesize,
                                    ptrdiff_t bytewidth, int height)
{
    int cpu_flags = av_get_cpu_flags();
    ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64);

    if (EXTERNAL_SSE4(cpu_flags) &&
        bw_aligned <= dst_linesize && bw_aligned <= src_linesize)
        ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize,
                                         bw_aligned, height);
    else
        return AVERROR(ENOSYS);

    return 0;
}
예제 #11
0
void ff_diracdsp_init_x86(DiracDSPContext* c)
{
    int mm_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(mm_flags)) {
        c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
#if !ARCH_X86_64
        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
        c->dirac_hpel_filter = dirac_hpel_filter_mmx;
        c->add_rect_clamped = ff_add_rect_clamped_mmx;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
#endif
        PIXFUNC(put, 0, mmx);
        PIXFUNC(avg, 0, mmx);
    }

    if (EXTERNAL_MMXEXT(mm_flags)) {
        PIXFUNC(avg, 0, mmxext);
    }

    if (EXTERNAL_SSE2(mm_flags)) {
        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
        c->add_rect_clamped = ff_add_rect_clamped_sse2;
        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;

        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;

        c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
        c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
        c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
        c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
    }

    if (EXTERNAL_SSE4(mm_flags)) {
        c->dequant_subband[1]         = ff_dequant_subband_32_sse4;
#if ARCH_X86_64
        c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
#endif
    }
}
예제 #12
0
av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(cpu_flags)) {
#if ARCH_X86_32
        s->decode_hf = ff_decode_hf_sse;
#endif
        s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
        s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse2;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse4;
    }
}
예제 #13
0
av_cold void ff_psdsp_init_x86(PSDSPContext *s)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE(cpu_flags)) {
        s->add_squares            = ff_ps_add_squares_sse;
        s->mul_pair_single        = ff_ps_mul_pair_single_sse;
        s->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_sse;
        s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse;
        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse;
    }
    if (EXTERNAL_SSE3(cpu_flags)) {
        s->add_squares            = ff_ps_add_squares_sse3;
        s->stereo_interpolate[0]  = ff_ps_stereo_interpolate_sse3;
        s->stereo_interpolate[1]  = ff_ps_stereo_interpolate_ipdopd_sse3;
        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse3;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse4;
    }
}
예제 #14
0
av_cold void ff_proresdsp_init_x86(ProresDSPContext *dsp)
{
#if ARCH_X86_64
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_SSE2(cpu_flags)) {
        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
        dsp->idct_put = ff_prores_idct_put_10_sse2;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
        dsp->idct_put = ff_prores_idct_put_10_sse4;
    }

    if (EXTERNAL_AVX(cpu_flags)) {
        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
        dsp->idct_put = ff_prores_idct_put_10_avx;
    }
#endif /* ARCH_X86_64 */
}
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();
    int bit_depth = (!yadif->csp) ? 8
                                  : yadif->csp->comp[0].depth_minus1 + 1;

    if (bit_depth >= 15) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_ssse3;
        if (EXTERNAL_SSE4(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
    } else if ( bit_depth >= 9 && bit_depth <= 14) {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_10bit_ssse3;
    } else {
#if ARCH_X86_32
        if (EXTERNAL_MMXEXT(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_mmxext;
#endif /* ARCH_X86_32 */
        if (EXTERNAL_SSE2(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_sse2;
        if (EXTERNAL_SSSE3(cpu_flags))
            yadif->filter_line = ff_yadif_filter_line_ssse3;
    }
#endif /* HAVE_YASM */
}
예제 #16
0
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

#define SET_LUMA_FUNCS(tabname, funcname, depth, cf)      \
    c->tabname[0] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[1] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf;

#define SET_CHROMA_FUNCS(tabname, funcname, depth, cf)    \
    c->tabname[1] = funcname ## _4_  ## depth ## _ ## cf; \
    c->tabname[3] = funcname ## _8_  ## depth ## _ ## cf; \
    c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \
    c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \
    c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \
    c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf;

#define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS  (put_hevc_qpel[v][h], name, depth, cf)
#define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf)

    if (bit_depth == 8) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
            SET_QPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     8, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     8, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2);
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            SET_QPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_qpel_v);
            SET_EPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_epel_v);

        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_8_avx;
            c->idct[1] = ff_hevc_idct_8x8_8_avx;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
        }
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;

            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;

            c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
            c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
            SET_QPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);
            SET_EPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);

            SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     10, sse2);
            SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     10, sse2);
            SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
            c->idct[0] = ff_hevc_idct_4x4_10_avx;
            c->idct[1] = ff_hevc_idct_8x8_10_avx;
        }
    }

#if ARCH_X86_64
    if (bit_depth == 8) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_8_sse2;
            c->idct[3] = ff_hevc_idct_32x32_8_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }

        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     8, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 8, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4);
        }

        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_8_avx;
            c->idct[3] = ff_hevc_idct_32x32_8_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->idct[2] = ff_hevc_idct_16x16_10_sse2;
            c->idct[3] = ff_hevc_idct_32x32_10_sse2;
        }
        if (EXTERNAL_SSSE3(cpu_flags)) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(cpu_flags)) {
            SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     10, sse4);
            SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 10, sse4);
            SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4);
        }
        if (EXTERNAL_AVX(cpu_flags)) {
#if HAVE_AVX_EXTERNAL
            SET_QPEL_FUNCS(0, 1, 10, avx, ff_hevc_qpel_h);
            SET_QPEL_FUNCS(1, 0, 10, avx, ff_hevc_qpel_v);
            SET_QPEL_FUNCS(1, 1, 10, avx, hevc_qpel_hv);
            SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h);
            SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v);
            SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv);
#endif /* HAVE_AVX_EXTERNAL */
            c->idct[2] = ff_hevc_idct_16x16_10_avx;
            c->idct[3] = ff_hevc_idct_32x32_10_avx;
        }
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
        }
    }
#endif /* ARCH_X86_64 */
}
예제 #17
0
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                                 const int chroma_format_idc)
{
#if HAVE_YASM
    int mm_flags = av_get_cpu_flags();

    if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {
            c->h264_idct_dc_add   =
            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
            c->h264_idct8_dc_add  =
            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;

            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
            if (chroma_format_idc == 1)
                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
            if (mm_flags & AV_CPU_FLAG_CMOV)
                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;

            if (EXTERNAL_MMXEXT(mm_flags)) {
                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmxext;
                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
                c->h264_idct_add16   = ff_h264_idct_add16_8_mmxext;
                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmxext;
                if (chroma_format_idc == 1)
                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext;
                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext;

                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext;
                if (chroma_format_idc == 1) {
                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmxext;
                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
                }
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmxext;
                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmxext;
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext;
                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext;
                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;

                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext;
                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;

                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;

                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
                }
                if (EXTERNAL_SSSE3(mm_flags)) {
                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32
                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmxext;
                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmxext;
                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmxext;
                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmxext;
                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmxext;
#endif /* ARCH_X86_32 */
                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
                if (EXTERNAL_SSE2(mm_flags)) {
                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif /* HAVE_ALIGNED_STACK */

                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif /* HAVE_ALIGNED_STACK */
                }
                if (EXTERNAL_SSE4(mm_flags)) {
                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;

                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
                }
                if (EXTERNAL_AVX(mm_flags)) {
                    c->h264_idct_dc_add  =
                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;

                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
                    if (chroma_format_idc == 1)
                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
#endif /* HAVE_ALIGNED_STACK */

                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
#if HAVE_ALIGNED_STACK
                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
#endif /* HAVE_ALIGNED_STACK */
                }
            }
        }
    }
#endif
}
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{
    int cpu_flags = av_get_cpu_flags();

    if (!is_16bit) {
        if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_ADDITION:     param->blend = ff_blend_addition_sse2;     break;
            case BLEND_GRAINMERGE:   param->blend = ff_blend_grainmerge_sse2;   break;
            case BLEND_AND:          param->blend = ff_blend_and_sse2;          break;
            case BLEND_AVERAGE:      param->blend = ff_blend_average_sse2;      break;
            case BLEND_DARKEN:       param->blend = ff_blend_darken_sse2;       break;
            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_sse2; break;
            case BLEND_DIVIDE:       param->blend = ff_blend_divide_sse2;       break;
            case BLEND_HARDMIX:      param->blend = ff_blend_hardmix_sse2;      break;
            case BLEND_LIGHTEN:      param->blend = ff_blend_lighten_sse2;      break;
            case BLEND_MULTIPLY:     param->blend = ff_blend_multiply_sse2;     break;
            case BLEND_OR:           param->blend = ff_blend_or_sse2;           break;
            case BLEND_PHOENIX:      param->blend = ff_blend_phoenix_sse2;      break;
            case BLEND_SCREEN:       param->blend = ff_blend_screen_sse2;       break;
            case BLEND_SUBTRACT:     param->blend = ff_blend_subtract_sse2;     break;
            case BLEND_XOR:          param->blend = ff_blend_xor_sse2;          break;
            case BLEND_DIFFERENCE:   param->blend = ff_blend_difference_sse2;   break;
            case BLEND_EXTREMITY:    param->blend = ff_blend_extremity_sse2;    break;
            case BLEND_NEGATION:     param->blend = ff_blend_negation_sse2;     break;
            }
        }
        if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_ssse3;  break;
            case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
            }
        }

        if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_ADDITION:     param->blend = ff_blend_addition_avx2;     break;
            case BLEND_GRAINMERGE:   param->blend = ff_blend_grainmerge_avx2;   break;
            case BLEND_AND:          param->blend = ff_blend_and_avx2;          break;
            case BLEND_AVERAGE:      param->blend = ff_blend_average_avx2;      break;
            case BLEND_DARKEN:       param->blend = ff_blend_darken_avx2;       break;
            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; break;
            case BLEND_HARDMIX:      param->blend = ff_blend_hardmix_avx2;      break;
            case BLEND_LIGHTEN:      param->blend = ff_blend_lighten_avx2;      break;
            case BLEND_MULTIPLY:     param->blend = ff_blend_multiply_avx2;     break;
            case BLEND_OR:           param->blend = ff_blend_or_avx2;           break;
            case BLEND_PHOENIX:      param->blend = ff_blend_phoenix_avx2;      break;
            case BLEND_SCREEN:       param->blend = ff_blend_screen_avx2;       break;
            case BLEND_SUBTRACT:     param->blend = ff_blend_subtract_avx2;     break;
            case BLEND_XOR:          param->blend = ff_blend_xor_avx2;          break;
            case BLEND_DIFFERENCE:   param->blend = ff_blend_difference_avx2;   break;
            case BLEND_EXTREMITY:    param->blend = ff_blend_extremity_avx2;    break;
            case BLEND_NEGATION:     param->blend = ff_blend_negation_avx2;     break;
            }
        }
    } else { /* is_16_bit */
#if ARCH_X86_64
        if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_ADDITION: param->blend = ff_blend_addition_16_sse2; break;
            case BLEND_AND:      param->blend = ff_blend_and_16_sse2;      break;
            case BLEND_AVERAGE:  param->blend = ff_blend_average_16_sse2;  break;
            case BLEND_OR:       param->blend = ff_blend_or_16_sse2;       break;
            case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_sse2; break;
            case BLEND_XOR:      param->blend = ff_blend_xor_16_sse2;      break;
            }
        }
        if (EXTERNAL_SSE4(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_sse4; break;
            case BLEND_DARKEN:   param->blend = ff_blend_darken_16_sse4;     break;
            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_sse4; break;
            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_sse4; break;
            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_16_sse4;    break;
            case BLEND_NEGATION:  param->blend = ff_blend_negation_16_sse4;     break;
            case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_16_sse4;    break;
            case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_16_sse4;    break;
            }
        }
        if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
            switch (param->mode) {
            case BLEND_ADDITION: param->blend = ff_blend_addition_16_avx2; break;
            case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_avx2;   break;
            case BLEND_AND:      param->blend = ff_blend_and_16_avx2;      break;
            case BLEND_AVERAGE:  param->blend = ff_blend_average_16_avx2;  break;
            case BLEND_DARKEN:   param->blend = ff_blend_darken_16_avx2;   break;
            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_avx2; break;
            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_avx2; break;
            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_16_avx2;    break;
            case BLEND_NEGATION:  param->blend = ff_blend_negation_16_avx2;     break;
            case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_16_avx2;  break;
            case BLEND_OR:       param->blend = ff_blend_or_16_avx2;       break;
            case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_16_avx2;  break;
            case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_avx2; break;
            case BLEND_XOR:      param->blend = ff_blend_xor_16_avx2;      break;
            }
        }
#endif /* ARCH_X86_64 */
    }
}
예제 #19
0
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                  0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
                                      0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2);
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                      6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2);
        } else {
            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                      6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow);
        }
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
    }
    if (EXTERNAL_SSSE3(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
                                  0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
    }
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
                                  0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
                                  0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
    }
    if (EXTERNAL_AVX(cpu_flags)) {
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
                                  6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
                                  6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                  6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
                                  6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx);
        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
                                  6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
    }
}
예제 #20
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMXEXT(mm_flags)) {
                c->transform_dc_add[0]    =  ff_hevc_idct4_dc_add_8_mmxext;
                c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_8_mmxext;

        }
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;

                    c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_8_sse2;
                    c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_8_sse2;
        }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
        }
        if (EXTERNAL_AVX2(mm_flags)) {
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_8_avx2;
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMXEXT(mm_flags)) {
                c->transform_dc_add[0]    =  ff_hevc_idct4_dc_add_10_mmxext;

        }
        if (EXTERNAL_SSE2(mm_flags)) {
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;


                    c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_10_sse2;
                    c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_sse2;
                    c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_sse2;
                }
        if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
        if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {

            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
        if (EXTERNAL_AVX(mm_flags)) {
            c->transform_dc_add[1]    =  ff_hevc_idct8_dc_add_10_avx;
            c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_avx;
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_avx;
        }
        if (EXTERNAL_AVX2(mm_flags)) {
            c->transform_dc_add[2]    =  ff_hevc_idct16_dc_add_10_avx2;
            c->transform_dc_add[3]    =  ff_hevc_idct32_dc_add_10_avx2;

        }
    }
}
예제 #21
0
av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
#if ARCH_X86_32
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;

        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
#endif
    }

    /* note that 4-tap width=16 functions are missing because w=16
     * is only used for luma, and luma is always a copy or sixtap. */
    if (EXTERNAL_MMXEXT(cpu_flags)) {
#if ARCH_X86_32
        c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
#endif
    }

    if (EXTERNAL_SSE(cpu_flags)) {
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
    }

    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
    }

    if (EXTERNAL_SSE2(cpu_flags)) {
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;

        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;

        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;

        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;

        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;

        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
    }

    if (EXTERNAL_SSE4(cpu_flags)) {
        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;

        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
    }
#endif /* HAVE_YASM */
}
예제 #22
0
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int mm_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
        if (EXTERNAL_MMX(mm_flags)) {
            /*if (mm_flags & AV_CPU_FLAG_CMOV)
                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; */

            if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
                /* MMEXT optimizations */
#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */

                if (EXTERNAL_SSE2(mm_flags)) 
                {
#if 0
                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
#endif
                }
                if (EXTERNAL_SSSE3(mm_flags)) {

                    c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_8_sse4;

                    c->transform_add[0] = ff_hevc_transform_4x4_add_8_sse4;
                    c->transform_add[1] = ff_hevc_transform_8x8_add_8_sse4;
                    c->transform_add[2] = ff_hevc_transform_16x16_add_8_sse4;
                    c->transform_add[3] = ff_hevc_transform_32x32_add_8_sse4;

                    c->put_unweighted_pred = ff_hevc_put_unweighted_pred_8_sse;

                    c->put_hevc_qpel[0][0] = ff_hevc_put_hevc_qpel_pixels_8_sse;
                    c->put_hevc_qpel[0][1] = ff_hevc_put_hevc_qpel_h_1_8_sse;
                    c->put_hevc_qpel[0][2] = ff_hevc_put_hevc_qpel_h_2_8_sse;
                    c->put_hevc_qpel[0][3] = ff_hevc_put_hevc_qpel_h_3_8_sse;
                    c->put_hevc_qpel[1][0] = ff_hevc_put_hevc_qpel_v_1_8_sse;
                    c->put_hevc_qpel[2][0] = ff_hevc_put_hevc_qpel_v_2_8_sse;
                    c->put_hevc_qpel[3][0] = ff_hevc_put_hevc_qpel_v_3_8_sse;

//#if ARCH_X86_64
#if 0 
                    c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
                    c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
#endif

                }
                if (EXTERNAL_SSE4(mm_flags)) {
#if GCC_VERSION > MIN_GCC_VERSION_MC || __APPLE__
                	c->put_weighted_pred_avg = ff_hevc_put_weighted_pred_avg_8_sse;
                	c->weighted_pred = ff_hevc_weighted_pred_8_sse;
                	c->weighted_pred_avg = ff_hevc_weighted_pred_avg_8_sse;


                	c->put_hevc_epel[0][0] = ff_hevc_put_hevc_epel_pixels_8_sse;
                	c->put_hevc_epel[0][1] = ff_hevc_put_hevc_epel_h_8_sse;
                	c->put_hevc_epel[1][0] = ff_hevc_put_hevc_epel_v_8_sse;
                	c->put_hevc_epel[1][1] = ff_hevc_put_hevc_epel_hv_8_sse;

#endif
                	c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_0_8_sse;
                	c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_1_8_sse;
                	c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_2_8_sse;
                	c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_3_8_sse;

                	c->sao_band_filter[0] = ff_hevc_sao_band_filter_0_8_sse;
                	c->sao_band_filter[1] = ff_hevc_sao_band_filter_1_8_sse;
                	c->sao_band_filter[2] = ff_hevc_sao_band_filter_2_8_sse;
                	c->sao_band_filter[3] = ff_hevc_sao_band_filter_3_8_sse;

                    c->put_hevc_qpel[1][1] = ff_hevc_put_hevc_qpel_h_1_v_1_sse;
                    c->put_hevc_qpel[1][2] = ff_hevc_put_hevc_qpel_h_2_v_1_sse;
                    c->put_hevc_qpel[1][3] = ff_hevc_put_hevc_qpel_h_3_v_1_sse;
                    c->put_hevc_qpel[2][1] = ff_hevc_put_hevc_qpel_h_1_v_2_sse;
                    c->put_hevc_qpel[2][2] = ff_hevc_put_hevc_qpel_h_2_v_2_sse;
                    c->put_hevc_qpel[2][3] = ff_hevc_put_hevc_qpel_h_3_v_2_sse;
                    c->put_hevc_qpel[3][1] = ff_hevc_put_hevc_qpel_h_1_v_3_sse;
                    c->put_hevc_qpel[3][2] = ff_hevc_put_hevc_qpel_h_2_v_3_sse;
                    c->put_hevc_qpel[3][3] = ff_hevc_put_hevc_qpel_h_3_v_3_sse;


                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    } else if (bit_depth == 10) {
        if (EXTERNAL_MMX(mm_flags)) {
            if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32
#endif /* ARCH_X86_32 */
                if (EXTERNAL_SSE2(mm_flags)) {
#if HAVE_ALIGNED_STACK
                    /*stuff that requires aligned stack */
#endif /* HAVE_ALIGNED_STACK */
                }
                if (EXTERNAL_SSE4(mm_flags)) {

                    c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_10_sse4;

                    c->transform_add[0] = ff_hevc_transform_4x4_add_10_sse4;
                    c->transform_add[1] = ff_hevc_transform_8x8_add_10_sse4;
                    c->transform_add[2] = ff_hevc_transform_16x16_add_10_sse4;
                    c->transform_add[3] = ff_hevc_transform_32x32_add_10_sse4;

                    c->put_hevc_epel[0][0] = ff_hevc_put_hevc_epel_pixels_10_sse;
                    c->put_hevc_epel[0][1] = ff_hevc_put_hevc_epel_h_10_sse;
                    c->put_hevc_epel[1][0] = ff_hevc_put_hevc_epel_v_10_sse;
                    c->put_hevc_epel[1][1] = ff_hevc_put_hevc_epel_hv_10_sse;

                    c->put_hevc_qpel[0][0] = ff_hevc_put_hevc_qpel_pixels_10_sse;
                    c->put_hevc_qpel[0][1] = ff_hevc_put_hevc_qpel_h_1_10_sse;
                    c->put_hevc_qpel[1][0] = ff_hevc_put_hevc_qpel_v_1_10_sse;
                    c->put_hevc_qpel[2][0] = ff_hevc_put_hevc_qpel_v_2_10_sse;
                    c->put_hevc_qpel[3][0] = ff_hevc_put_hevc_qpel_v_3_10_sse;




                }
                if (EXTERNAL_AVX(mm_flags)) {
                }
            }
        }
    }
}
예제 #23
0
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
    int cpu_flags = av_get_cpu_flags();

    if (bit_depth == 8) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
            c->transform_add[0]    =  ff_hevc_transform_add4_8_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
            }
            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;

            c->transform_add[1]    = ff_hevc_transform_add8_8_sse2;
            c->transform_add[2]    = ff_hevc_transform_add16_8_sse2;
            c->transform_add[3]    = ff_hevc_transform_add32_8_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
            }
            c->transform_add[1]    = ff_hevc_transform_add8_8_avx;
            c->transform_add[2]    = ff_hevc_transform_add16_8_avx;
            c->transform_add[3]    = ff_hevc_transform_add32_8_avx;
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;

            c->transform_add[3]    = ff_hevc_transform_add32_8_avx2;
        }
#endif
    } else if (bit_depth == 10) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->transform_add[0] = ff_hevc_transform_add4_10_mmxext;
            c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
            }

            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;

            c->transform_add[1]    = ff_hevc_transform_add8_10_sse2;
            c->transform_add[2]    = ff_hevc_transform_add16_10_sse2;
            c->transform_add[3]    = ff_hevc_transform_add32_10_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
            }
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {

            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;

            c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
            c->transform_add[3] = ff_hevc_transform_add32_10_avx2;

        }
#endif
    } else if (bit_depth == 12) {
#if (HAVE_MMXEXT_EXTERNAL == 1)
        if (EXTERNAL_MMXEXT(cpu_flags)) {
            c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext;
            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext;
        }
#endif
#if (HAVE_SSE2_EXTERNAL == 1)
        if (EXTERNAL_SSE2(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
            }

            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2;
            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2;
        }
#endif
#if (HAVE_SSSE3_EXTERNAL == 1)
        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
        }
#endif
#if (HAVE_SSE4_EXTERNAL == 1)
        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);

            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
        }
#endif
#if (HAVE_AVX_EXTERNAL == 1)
        if (EXTERNAL_AVX(cpu_flags)) {
            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
            if (ARCH_X86_64) {
                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
            }
        }
#endif
#if (HAVE_AVX2_EXTERNAL == 1)
        if (EXTERNAL_AVX2(cpu_flags)) {
            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
        }
#endif
    }
}