av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); #if (HAVE_MMX_INLINE == 1) if (INLINE_MMX(cpu_flags)) { c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEAUTO || avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; c->perm_type = FF_IDCT_PERM_SIMPLE; } } #endif #if (HAVE_MMX_EXTERNAL == 1) if (EXTERNAL_MMX(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; } #endif #if (HAVE_SSE2_EXTERNAL == 1) if (EXTERNAL_SSE2(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; } #endif }
av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); if (high_bit_depth || !(avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_XVID)) return; if (INLINE_MMX(cpu_flags)) { c->idct_put = ff_xvid_idct_mmx_put; c->idct_add = ff_xvid_idct_mmx_add; c->idct = ff_xvid_idct_mmx; c->perm_type = FF_IDCT_PERM_NONE; } if (INLINE_MMXEXT(cpu_flags)) { c->idct_put = ff_xvid_idct_mmxext_put; c->idct_add = ff_xvid_idct_mmxext_add; c->idct = ff_xvid_idct_mmxext; c->perm_type = FF_IDCT_PERM_NONE; } if (INLINE_SSE2(cpu_flags)) { c->idct_put = ff_xvid_idct_sse2_put; c->idct_add = ff_xvid_idct_sse2_add; c->idct = ff_xvid_idct_sse2; c->perm_type = FF_IDCT_PERM_SSE2; } }
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int mm_flags = av_get_cpu_flags(); #if HAVE_MMX_INLINE if (INLINE_MMX(mm_flags)) ff_vc1dsp_init_mmx(dsp); #endif #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(mm_flags)) ff_vc1dsp_init_mmxext(dsp); #endif #define ASSIGN_LF(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT #if HAVE_YASM if (mm_flags & AV_CPU_FLAG_MMX) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; } if (mm_flags & AV_CPU_FLAG_MMXEXT) { ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_mmxext; } else if (mm_flags & AV_CPU_FLAG_3DNOW) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (mm_flags & AV_CPU_FLAG_SSE2) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; } if (mm_flags & AV_CPU_FLAG_SSSE3) { ASSIGN_LF(ssse3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; } if (mm_flags & AV_CPU_FLAG_SSE4) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; } #endif /* HAVE_YASM */ }
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { #if HAVE_MMX_INLINE int cpu_flags = av_get_cpu_flags(); if (c->srcFormat != AV_PIX_FMT_YUV420P && c->srcFormat != AV_PIX_FMT_YUVA420P) return NULL; #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: return yuv420_rgb24_mmxext; case AV_PIX_FMT_BGR24: return yuv420_bgr24_mmxext; } } #endif if (INLINE_MMX(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA return yuva420_rgb32_mmx; #endif break; } else return yuv420_rgb32_mmx; case AV_PIX_FMT_BGR32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA return yuva420_bgr32_mmx; #endif break; } else return yuv420_bgr32_mmx; case AV_PIX_FMT_RGB24: return yuv420_rgb24_mmx; case AV_PIX_FMT_BGR24: return yuv420_bgr24_mmx; case AV_PIX_FMT_RGB565: return yuv420_rgb16_mmx; case AV_PIX_FMT_RGB555: return yuv420_rgb15_mmx; } } #endif /* HAVE_MMX_INLINE */ return NULL; }
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); #if HAVE_6REGS && HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) ff_vc1dsp_init_mmx(dsp); #endif #if HAVE_6REGS && HAVE_MMX_EXTERNAL && HAVE_INLINE_ASM if (INLINE_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); #endif #define ASSIGN_LF(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT #if HAVE_YASM if (EXTERNAL_MMX(cpu_flags)) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (EXTERNAL_MMXEXT(cpu_flags)) { ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF(ssse3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; } #endif /* HAVE_YASM */ }
av_cold void rgb2rgb_init_x86(void) { #if HAVE_INLINE_ASM int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) rgb2rgb_init_mmx(); if (INLINE_AMD3DNOW(cpu_flags)) rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) rgb2rgb_init_sse2(); #endif /* HAVE_INLINE_ASM */ }
av_cold void ff_mpv_common_init_x86(MpegEncContext *s) { #if HAVE_MMX_INLINE int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) { s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT)) s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; } #endif /* HAVE_MMX_INLINE */ }
av_cold void ff_noise_init_x86(NoiseContext *n) { #if HAVE_INLINE_ASM int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) { n->line_noise = line_noise_mmx; #if HAVE_6REGS n->line_noise_avg = line_noise_avg_mmx; #endif } if (INLINE_MMXEXT(cpu_flags)) { n->line_noise = line_noise_mmxext; } #endif }
av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); const int dct_algo = avctx->dct_algo; if (!high_bit_depth) { if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) { if (INLINE_MMX(cpu_flags)) c->fdct = ff_fdct_mmx; if (INLINE_MMXEXT(cpu_flags)) c->fdct = ff_fdct_mmxext; if (INLINE_SSE2(cpu_flags)) c->fdct = ff_fdct_sse2; } } }
av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) { c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; if (!high_bit_depth && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; c->perm_type = FF_IDCT_PERM_SIMPLE; } } }
av_cold void ff_dct_encode_init_x86(MpegEncContext *s) { const int dct_algo = s->avctx->dct_algo; if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { #if HAVE_MMX_INLINE int mm_flags = av_get_cpu_flags(); if (INLINE_MMX(mm_flags)) s->dct_quantize = dct_quantize_MMX; #endif #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(mm_flags)) s->dct_quantize = dct_quantize_MMXEXT; #endif #if HAVE_SSE2_INLINE if (INLINE_SSE2(mm_flags)) s->dct_quantize = dct_quantize_SSE2; #endif #if HAVE_SSSE3_INLINE if (INLINE_SSSE3(mm_flags)) s->dct_quantize = dct_quantize_SSSE3; #endif } }
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) { int cpu_flags = av_get_cpu_flags(); #if HAVE_7REGS && HAVE_INLINE_ASM if (cpu_flags & AV_CPU_FLAG_CMOV) c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov; #endif if (INLINE_MMX(cpu_flags)) c->add_bytes = ff_add_bytes_mmx; if (EXTERNAL_MMXEXT(cpu_flags)) { /* slower than cmov version on AMD */ if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; } if (EXTERNAL_SSSE3(cpu_flags)) { c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; } }
av_cold void ff_sws_init_swscale_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); #if HAVE_MMX_INLINE if (INLINE_MMX(cpu_flags)) sws_init_swscale_mmx(c); #endif #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(cpu_flags)) sws_init_swscale_mmxext(c); #endif #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ if (c->srcBpc == 8) { \ hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ ff_hscale8to19_ ## filtersize ## _ ## opt1; \ } else if (c->srcBpc == 9) { \ hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ ff_hscale9to19_ ## filtersize ## _ ## opt1; \ } else if (c->srcBpc == 10) { \ hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ ff_hscale10to19_ ## filtersize ## _ ## opt1; \ } else /* c->srcBpc == 16 */ { \ hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ } while (0) #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ switch (filtersize) { \ case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ } #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ switch(c->dstBpc){ \ case 16: do_16_case; break; \ case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ switch(c->dstBpc){ \ case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ } #define case_rgb(x, X, opt) \ case AV_PIX_FMT_ ## X: \ c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ if (!c->chrSrcHSubSample) \ c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ break #if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags)) { ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT); switch (c->srcFormat) { case AV_PIX_FMT_Y400A: c->lumToYV12 = ff_yuyvToY_mmx; if (c->alpPixBuf) c->alpToYV12 = ff_uyvyToY_mmx; break; case AV_PIX_FMT_YUYV422: c->lumToYV12 = ff_yuyvToY_mmx; c->chrToYV12 = ff_yuyvToUV_mmx; break; case AV_PIX_FMT_UYVY422: c->lumToYV12 = ff_uyvyToY_mmx; c->chrToYV12 = ff_uyvyToUV_mmx; break; case AV_PIX_FMT_NV12: c->chrToYV12 = ff_nv12ToUV_mmx; break; case AV_PIX_FMT_NV21: c->chrToYV12 = ff_nv21ToUV_mmx; break; case_rgb(rgb24, RGB24, mmx); case_rgb(bgr24, BGR24, mmx); case_rgb(bgra, BGRA, mmx); case_rgb(rgba, RGBA, mmx); case_rgb(abgr, ABGR, mmx); case_rgb(argb, ARGB, mmx); default: break; } } if (EXTERNAL_MMXEXT(cpu_flags)) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1); } #endif /* ARCH_X86_32 */ #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ switch (filtersize) { \ case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ break; \ } if (EXTERNAL_SSE2(cpu_flags)) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , HAVE_ALIGNED_STACK || ARCH_X86_64); ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); switch (c->srcFormat) { case AV_PIX_FMT_Y400A: c->lumToYV12 = ff_yuyvToY_sse2; if (c->alpPixBuf) c->alpToYV12 = ff_uyvyToY_sse2; break; case AV_PIX_FMT_YUYV422: c->lumToYV12 = ff_yuyvToY_sse2; c->chrToYV12 = ff_yuyvToUV_sse2; break; case AV_PIX_FMT_UYVY422: c->lumToYV12 = ff_uyvyToY_sse2; c->chrToYV12 = ff_uyvyToUV_sse2; break; case AV_PIX_FMT_NV12: c->chrToYV12 = ff_nv12ToUV_sse2; break; case AV_PIX_FMT_NV21: c->chrToYV12 = ff_nv21ToUV_sse2; break; case_rgb(rgb24, RGB24, sse2); case_rgb(bgr24, BGR24, sse2); case_rgb(bgra, BGRA, sse2); case_rgb(rgba, RGBA, sse2); case_rgb(abgr, ABGR, sse2); case_rgb(argb, ARGB, sse2); default: break; } } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); switch (c->srcFormat) { case_rgb(rgb24, RGB24, ssse3); case_rgb(bgr24, BGR24, ssse3); default: break; } } if (EXTERNAL_SSE4(cpu_flags)) { /* Xto15 don't need special sse4 functions */ ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4, HAVE_ALIGNED_STACK || ARCH_X86_64); if (c->dstBpc == 16 && !isBE(c->dstFormat)) c->yuv2plane1 = ff_yuv2plane1_16_sse4; } if (EXTERNAL_AVX(cpu_flags)) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , HAVE_ALIGNED_STACK || ARCH_X86_64); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); switch (c->srcFormat) { case AV_PIX_FMT_YUYV422: c->chrToYV12 = ff_yuyvToUV_avx; break; case AV_PIX_FMT_UYVY422: c->chrToYV12 = ff_uyvyToUV_avx; break; case AV_PIX_FMT_NV12: c->chrToYV12 = ff_nv12ToUV_avx; break; case AV_PIX_FMT_NV21: c->chrToYV12 = ff_nv21ToUV_avx; break; case_rgb(rgb24, RGB24, avx); case_rgb(bgr24, BGR24, avx); case_rgb(bgra, BGRA, avx); case_rgb(rgba, RGBA, avx); case_rgb(abgr, ABGR, avx); case_rgb(argb, ARGB, avx); default: break; } } }