/**************************************************************************** * x264_pixel_init: ****************************************************************************/ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) { memset( pixf, 0, sizeof(*pixf) ); #define INIT2( name, cpu ) \ pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\ pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu; #define INIT4( name, cpu ) \ INIT2( name, cpu ) \ pixf->name[PIXEL_8x16] = x264_pixel_##name##_8x16##cpu;\ pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu; #define INIT5( name, cpu ) \ INIT4( name, cpu ) \ pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu; #define INIT7( name, cpu ) \ INIT5( name, cpu ) \ pixf->name[PIXEL_4x8] = x264_pixel_##name##_4x8##cpu;\ pixf->name[PIXEL_4x4] = x264_pixel_##name##_4x4##cpu; #define INIT_ADS( cpu ) \ pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\ pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\ pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu; INIT7( sad, ); INIT7( sad_x3, ); INIT7( sad_x4, ); INIT7( ssd, ); INIT7( satd, ); INIT4( sa8d, ); INIT_ADS( ); pixf->ssim_4x4x2_core = ssim_4x4x2_core; pixf->ssim_end4 = ssim_end4; #ifdef HAVE_MMX if( cpu&X264_CPU_MMX ) { INIT7( ssd, _mmx ); } if( cpu&X264_CPU_MMXEXT ) { INIT7( sad, _mmxext ); INIT7( sad_x3, _mmxext ); INIT7( sad_x4, _mmxext ); INIT7( satd, _mmxext ); INIT_ADS( _mmxext ); #ifdef ARCH_X86 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext; if( cpu&X264_CPU_CACHELINE_SPLIT ) { if( cpu&X264_CPU_CACHELINE_32 ) { INIT5( sad, _cache32_mmxext ); INIT4( sad_x3, _cache32_mmxext ); INIT4( sad_x4, _cache32_mmxext ); } else { INIT5( sad, _cache64_mmxext ); INIT4( sad_x3, _cache64_mmxext ); INIT4( sad_x4, _cache64_mmxext ); } } #else if( cpu&X264_CPU_CACHELINE_SPLIT ) { pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext; pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_cache64_mmxext; pixf->sad[PIXEL_8x4] = x264_pixel_sad_8x4_cache64_mmxext; pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext; pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_cache64_mmxext; pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext; pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_cache64_mmxext; } #endif pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext; } // disable on AMD processors since it is slower if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) ) { INIT2( sad, _sse2 ); INIT2( sad_x3, _sse2 ); INIT2( sad_x4, _sse2 ); INIT5( satd, _sse2 ); INIT_ADS( _sse2 ); #ifdef ARCH_X86 if( cpu&X264_CPU_CACHELINE_SPLIT ) { INIT2( sad, _cache64_sse2 ); INIT2( sad_x3, _cache64_sse2 ); INIT2( sad_x4, _cache64_sse2 ); } #endif } // these are faster on both Intel and AMD if( cpu&X264_CPU_SSE2 ) { INIT2( ssd, _sse2 ); pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2; pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; #ifdef ARCH_X86_64 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; #endif } #ifdef HAVE_SSE3 if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) ) { INIT2( sad, _sse3 ); INIT2( sad_x3, _sse3 ); INIT2( sad_x4, _sse3 ); } if( cpu&X264_CPU_SSSE3 ) { INIT5( satd, _ssse3 ); INIT_ADS( _ssse3 ); #ifdef ARCH_X86_64 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; #endif if( cpu&X264_CPU_CACHELINE_SPLIT ) { INIT2( sad, _cache64_ssse3 ); INIT2( sad_x3, _cache64_ssse3 ); INIT2( sad_x4, _cache64_ssse3 ); } } #endif //HAVE_SSE3 #endif //HAVE_MMX #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) { x264_pixel_altivec_init( pixf ); } #endif #ifdef ARCH_UltraSparc INIT4( sad, _vis ); INIT4( sad_x3, _vis ); INIT4( sad_x4, _vis ); #endif pixf->ads[PIXEL_8x16] = pixf->ads[PIXEL_8x4] = pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8]; pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8]; }
/**************************************************************************** * x264_pixel_init: ****************************************************************************/ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) { memset( pixf, 0, sizeof(*pixf) ); #define INIT2( name, cpu ) \ pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\ pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu; #define INIT4( name, cpu ) \ INIT2( name, cpu ) \ pixf->name[PIXEL_8x16] = x264_pixel_##name##_8x16##cpu;\ pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu; #define INIT5( name, cpu ) \ INIT4( name, cpu ) \ pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu; #define INIT7( name, cpu ) \ INIT5( name, cpu ) \ pixf->name[PIXEL_4x8] = x264_pixel_##name##_4x8##cpu;\ pixf->name[PIXEL_4x4] = x264_pixel_##name##_4x4##cpu; #define INIT_ADS( cpu ) \ pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\ pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\ pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu; INIT7( sad, ); INIT7( sad_x3, ); INIT7( sad_x4, ); INIT7( ssd, ); INIT7( satd, ); INIT7( satd_x3, ); INIT7( satd_x4, ); INIT4( sa8d, ); INIT_ADS( ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8; pixf->ssim_4x4x2_core = ssim_4x4x2_core; pixf->ssim_end4 = ssim_end4; #ifdef HAVE_MMX if( cpu&X264_CPU_MMX ) { INIT7( ssd, _mmx ); } if( cpu&X264_CPU_MMXEXT ) { INIT7( sad, _mmxext ); INIT7( sad_x3, _mmxext ); INIT7( sad_x4, _mmxext ); INIT7( satd, _mmxext ); INIT7( satd_x3, _mmxext ); INIT7( satd_x4, _mmxext ); INIT_ADS( _mmxext ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_mmxext; #ifdef ARCH_X86 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext; if( cpu&X264_CPU_CACHELINE_32 ) { INIT5( sad, _cache32_mmxext ); INIT4( sad_x3, _cache32_mmxext ); INIT4( sad_x4, _cache32_mmxext ); } else if( cpu&X264_CPU_CACHELINE_64 ) { INIT5( sad, _cache64_mmxext ); INIT4( sad_x3, _cache64_mmxext ); INIT4( sad_x4, _cache64_mmxext ); } #else if( cpu&X264_CPU_CACHELINE_64 ) { pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext; pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_cache64_mmxext; pixf->sad[PIXEL_8x4] = x264_pixel_sad_8x4_cache64_mmxext; pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext; pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_cache64_mmxext; pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext; pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_cache64_mmxext; } #endif pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext; } if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) ) { INIT2( sad, _sse2 ); INIT2( sad_x3, _sse2 ); INIT2( sad_x4, _sse2 ); INIT_ADS( _sse2 ); pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2; #ifdef ARCH_X86 if( cpu&X264_CPU_CACHELINE_64 ) { INIT2( sad, _cache64_sse2 ); INIT2( sad_x3, _cache64_sse2 ); INIT2( sad_x4, _cache64_sse2 ); } #endif } if( cpu&X264_CPU_SSE2 ) { INIT5( ssd, _sse2 ); INIT5( satd, _sse2 ); INIT5( satd_x3, _sse2 ); INIT5( satd_x4, _sse2 ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2; pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; #ifdef ARCH_X86_64 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; #endif } if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) ) { INIT2( sad, _sse3 ); INIT2( sad_x3, _sse3 ); INIT2( sad_x4, _sse3 ); } if( cpu&X264_CPU_SSSE3 ) { INIT7( satd, _ssse3 ); INIT7( satd_x3, _ssse3 ); INIT7( satd_x4, _ssse3 ); INIT_ADS( _ssse3 ); pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_ssse3; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_ssse3; #ifdef ARCH_X86_64 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3; #endif if( cpu&X264_CPU_CACHELINE_64 ) { INIT2( sad, _cache64_ssse3 ); INIT2( sad_x3, _cache64_ssse3 ); INIT2( sad_x4, _cache64_ssse3 ); } if( cpu&X264_CPU_PHADD_IS_FAST ) { INIT5( satd, _ssse3_phadd ); INIT5( satd_x3, _ssse3_phadd ); INIT5( satd_x4, _ssse3_phadd ); } } #endif //HAVE_MMX #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) { x264_pixel_altivec_init( pixf ); } #endif #ifdef ARCH_UltraSparc INIT4( sad, _vis ); INIT4( sad_x3, _vis ); INIT4( sad_x4, _vis ); #endif pixf->ads[PIXEL_8x16] = pixf->ads[PIXEL_8x4] = pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8]; pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8]; }