void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) { #if HIGH_BIT_DEPTH if (cpuMask & X265_CPU_SSE2) p.sa8d[0] = p.sa8d[0]; #else if (cpuMask & X265_CPU_SSE2) { INIT8_NAME(sse_pp, ssd, _mmx); INIT8(sad, _mmx2); INIT8(sad_x3, _mmx2); INIT8(sad_x4, _mmx2); INIT8(satd, _mmx2); p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2; p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2; p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2; p.satd[LUMA_16x12] = x265_pixel_satd_16x12_sse2; p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2; p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2; p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse2; p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse2; p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2; p.sa8d[BLOCK_4x4] = x265_pixel_satd_4x4_mmx2; p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2; PIXEL_AVG(sse2); PIXEL_AVG_W4(mmx2); p.sad[LUMA_8x32] = x265_pixel_sad_8x32_sse2; p.sad[LUMA_16x4] = x265_pixel_sad_16x4_sse2; p.sad[LUMA_16x12] = x265_pixel_sad_16x12_sse2; p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2; p.sad[LUMA_16x64] = x265_pixel_sad_16x64_sse2; p.sad[LUMA_32x8] = x265_pixel_sad_32x8_sse2; p.sad[LUMA_32x16] = x265_pixel_sad_32x16_sse2; p.sad[LUMA_32x24] = x265_pixel_sad_32x24_sse2; p.sad[LUMA_32x32] = x265_pixel_sad_32x32_sse2; p.sad[LUMA_32x64] = x265_pixel_sad_32x64_sse2; p.sad[LUMA_64x16] = x265_pixel_sad_64x16_sse2; p.sad[LUMA_64x32] = x265_pixel_sad_64x32_sse2; p.sad[LUMA_64x48] = x265_pixel_sad_64x48_sse2; p.sad[LUMA_64x64] = x265_pixel_sad_64x64_sse2; p.sad[LUMA_48x64] = x265_pixel_sad_48x64_sse2; p.sad[LUMA_24x32] = x265_pixel_sad_24x32_sse2; p.sad[LUMA_12x16] = x265_pixel_sad_12x16_sse2; ASSGN_SSE(sse2); INIT2(sad, _sse2); INIT2(sad_x3, _sse2); INIT2(sad_x4, _sse2); INIT6(satd, _sse2); HEVC_SATD(sse2); CHROMA_BLOCKCOPY(_sse2); LUMA_BLOCKCOPY(_sse2); CHROMA_SS_FILTERS(_sse2); LUMA_SS_FILTERS(_sse2); // This function pointer initialization is temporary will be removed // later with macro definitions. It is used to avoid linker errors // until all partitions are coded and commit smaller patches, easier to // review. p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2; p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2; p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2; p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2; p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2; p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2; p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2; p.chroma_copy_sp[CHROMA_8x8] = x265_blockcopy_sp_8x8_sse2; p.chroma_copy_sp[CHROMA_8x16] = x265_blockcopy_sp_8x16_sse2; p.chroma_copy_sp[CHROMA_12x16] = x265_blockcopy_sp_12x16_sse2; p.chroma_copy_sp[CHROMA_16x4] = x265_blockcopy_sp_16x4_sse2; p.chroma_copy_sp[CHROMA_16x8] = x265_blockcopy_sp_16x8_sse2; p.chroma_copy_sp[CHROMA_16x12] = x265_blockcopy_sp_16x12_sse2; p.chroma_copy_sp[CHROMA_16x16] = x265_blockcopy_sp_16x16_sse2; p.chroma_copy_sp[CHROMA_16x32] = x265_blockcopy_sp_16x32_sse2; p.luma_copy_sp[LUMA_16x64] = x265_blockcopy_sp_16x64_sse2; p.chroma_copy_sp[CHROMA_24x32] = x265_blockcopy_sp_24x32_sse2; p.chroma_copy_sp[CHROMA_32x8] = x265_blockcopy_sp_32x8_sse2; p.chroma_copy_sp[CHROMA_32x16] = x265_blockcopy_sp_32x16_sse2; p.chroma_copy_sp[CHROMA_32x24] = x265_blockcopy_sp_32x24_sse2; p.chroma_copy_sp[CHROMA_32x32] = x265_blockcopy_sp_32x32_sse2; p.luma_copy_sp[LUMA_32x64] = x265_blockcopy_sp_32x64_sse2; p.luma_copy_sp[LUMA_48x64] = x265_blockcopy_sp_48x64_sse2; p.luma_copy_sp[LUMA_64x16] = x265_blockcopy_sp_64x16_sse2; p.luma_copy_sp[LUMA_64x32] = x265_blockcopy_sp_64x32_sse2; p.luma_copy_sp[LUMA_64x48] = x265_blockcopy_sp_64x48_sse2; p.luma_copy_sp[LUMA_64x64] = x265_blockcopy_sp_64x64_sse2; p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2; p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2; p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2; p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2; p.frame_init_lowres_core = x265_frame_init_lowres_core_sse2; p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_sse2; p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse2; SA8D_INTER_FROM_BLOCK(sse2); p.cvt32to16_shr = x265_cvt32to16_shr_sse2; p.ipfilter_ss[FILTER_V_S_S_8] = x265_interp_8tap_v_ss_sse2; p.calcrecon[BLOCK_4x4] = x265_calcRecons4_sse2; p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2; p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; } if (cpuMask & X265_CPU_SSSE3) { p.frame_init_lowres_core = x265_frame_init_lowres_core_ssse3; p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_ssse3; p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_ssse3; SA8D_INTER_FROM_BLOCK(ssse3); p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3; ASSGN_SSE(ssse3); PIXEL_AVG(ssse3); PIXEL_AVG_W4(ssse3); p.scale1D_128to64 = x265_scale1D_128to64_ssse3; p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3; p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3; p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3; p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3; p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_ssse3; p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_ssse3; p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3; p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3; p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ssse3; p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ssse3; p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ssse3; p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ssse3; p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ssse3; p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ssse3; p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ssse3; p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ssse3; p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ssse3; p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ssse3; p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ssse3; p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ssse3; p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ssse3; p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ssse3; p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ssse3; p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ssse3; p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ssse3; p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ssse3; p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ssse3; p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ssse3; p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ssse3; p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ssse3; p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ssse3; p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ssse3; p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ssse3; p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ssse3; p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ssse3; p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ssse3; p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3; p.luma_p2s = x265_luma_p2s_ssse3; p.chroma_p2s = x265_chroma_p2s_ssse3; CHROMA_SP_FILTERS(_ssse3); LUMA_SP_FILTERS(_ssse3); } if (cpuMask & X265_CPU_SSE4) { p.satd[LUMA_4x16] = x265_pixel_satd_4x16_sse4; p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse4; p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse4; p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse4; p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse4; p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_sse4; p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse4; SA8D_INTER_FROM_BLOCK(sse4); CHROMA_FILTERS(_sse4); LUMA_FILTERS(_sse4); HEVC_SATD(sse4); p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4; p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4; p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4; p.chroma_vsp[CHROMA_2x4] = x265_interp_4tap_vert_sp_2x4_sse4; p.chroma_vsp[CHROMA_2x8] = x265_interp_4tap_vert_sp_2x8_sse4; p.chroma_vsp[CHROMA_6x8] = x265_interp_4tap_vert_sp_6x8_sse4; p.calcrecon[BLOCK_16x16] = x265_calcRecons16_sse4; p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4; p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4; p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4; } if (cpuMask & X265_CPU_AVX) { p.frame_init_lowres_core = x265_frame_init_lowres_core_avx; p.satd[LUMA_4x16] = x265_pixel_satd_4x16_avx; p.satd[LUMA_12x16] = x265_pixel_satd_12x16_avx; p.satd[LUMA_32x8] = x265_pixel_satd_32x8_avx; p.satd[LUMA_32x16] = x265_pixel_satd_32x16_avx; p.satd[LUMA_32x24] = x265_pixel_satd_32x24_avx; p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx; p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_avx; SA8D_INTER_FROM_BLOCK(avx); ASSGN_SSE(avx); HEVC_SATD(avx); p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx; p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx; p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_avx; p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_avx; p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_avx; p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx; p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_avx; p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx; p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_avx; p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_avx; p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_avx; p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_avx; p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_avx; p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_avx; p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_avx; p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_avx; p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_avx; p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_avx; p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_avx; p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_avx; p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_avx; p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_avx; p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_avx; p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_avx; p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_avx; p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_avx; p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_avx; p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_avx; p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_avx; p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_avx; p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_avx; p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_avx; } if (cpuMask & X265_CPU_XOP) { p.frame_init_lowres_core = x265_frame_init_lowres_core_xop; p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_xop; p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_xop; SA8D_INTER_FROM_BLOCK(xop); INIT7(satd, _xop); INIT5_NAME(sse_pp, ssd, _xop); HEVC_SATD(xop); } if (cpuMask & X265_CPU_AVX2) { INIT2(sad_x4, _avx2); INIT4(satd, _avx2); INIT2_NAME(sse_pp, ssd, _avx2); p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx2; SA8D_INTER_FROM_BLOCK8(avx2); p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_avx2>; p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_64x48] = cmp<64, 48, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_48x64] = cmp<48, 64, 16, 16, x265_pixel_satd_16x16_avx2>; p.satd[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_satd_16x16_avx2>; p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2; p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2; } #endif // if HIGH_BIT_DEPTH }
/**************************************************************************** * x264_pixel_init: ****************************************************************************/ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) { memset( pixf, 0, sizeof(*pixf) ); #define INIT2( name, cpu ) \ pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\ pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu; #define INIT4( name, cpu ) \ INIT2( name, cpu ) \ pixf->name[PIXEL_8x16] = x264_pixel_##name##_8x16##cpu;\ pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu; #define INIT5( name, cpu ) \ INIT4( name, cpu ) \ pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu; #define INIT7( name, cpu ) \ INIT5( name, cpu ) \ pixf->name[PIXEL_4x8] = x264_pixel_##name##_4x8##cpu;\ pixf->name[PIXEL_4x4] = x264_pixel_##name##_4x4##cpu; #define INIT_ADS( cpu ) \ pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\ pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\ pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu; INIT7( sad, ); INIT7( sad_x3, ); INIT7( sad_x4, ); INIT7( ssd, ); INIT7( satd, ); INIT4( sa8d, ); INIT_ADS( ); pixf->ssim_4x4x2_core = ssim_4x4x2_core; pixf->ssim_end4 = ssim_end4; #ifdef HAVE_MMX if( cpu&X264_CPU_MMX ) { INIT7( ssd, _mmx ); } if( cpu&X264_CPU_MMXEXT ) { INIT7( sad, _mmxext ); INIT7( sad_x3, _mmxext ); INIT7( sad_x4, _mmxext ); INIT7( satd, _mmxext ); INIT_ADS( _mmxext ); #ifdef ARCH_X86 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext; if( cpu&X264_CPU_CACHELINE_SPLIT ) { if( cpu&X264_CPU_CACHELINE_32 ) { INIT5( sad, _cache32_mmxext ); INIT4( sad_x3, _cache32_mmxext ); INIT4( sad_x4, _cache32_mmxext ); } else { INIT5( sad, _cache64_mmxext ); INIT4( sad_x3, _cache64_mmxext ); INIT4( sad_x4, _cache64_mmxext ); } } #else if( cpu&X264_CPU_CACHELINE_SPLIT ) { pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext; pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_cache64_mmxext; pixf->sad[PIXEL_8x4] = x264_pixel_sad_8x4_cache64_mmxext; pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext; pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_cache64_mmxext; pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext; pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_cache64_mmxext; } #endif pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext; } // disable on AMD processors since it is slower if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) ) { INIT2( sad, _sse2 ); INIT2( sad_x3, _sse2 ); INIT2( sad_x4, _sse2 ); INIT5( satd, _sse2 ); INIT_ADS( _sse2 ); #ifdef ARCH_X86 if( cpu&X264_CPU_CACHELINE_SPLIT ) { INIT2( sad, _cache64_sse2 ); INIT2( sad_x3, _cache64_sse2 ); INIT2( sad_x4, _cache64_sse2 ); } #endif } // these are faster on both Intel and AMD if( cpu&X264_CPU_SSE2 ) { INIT2( ssd, _sse2 ); pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2; pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; #ifdef ARCH_X86_64 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; #endif } #ifdef HAVE_SSE3 if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) ) { INIT2( sad, _sse3 ); INIT2( sad_x3, _sse3 ); INIT2( sad_x4, _sse3 ); } if( cpu&X264_CPU_SSSE3 ) { INIT5( satd, _ssse3 ); INIT_ADS( _ssse3 ); #ifdef ARCH_X86_64 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; #endif if( cpu&X264_CPU_CACHELINE_SPLIT ) { INIT2( sad, _cache64_ssse3 ); INIT2( sad_x3, _cache64_ssse3 ); INIT2( sad_x4, _cache64_ssse3 ); } } #endif //HAVE_SSE3 #endif //HAVE_MMX #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) { x264_pixel_altivec_init( pixf ); } #endif #ifdef ARCH_UltraSparc INIT4( sad, _vis ); INIT4( sad_x3, _vis ); INIT4( sad_x4, _vis ); #endif pixf->ads[PIXEL_8x16] = pixf->ads[PIXEL_8x4] = pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8]; pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8]; }
/**************************************************************************** * x264_pixel_init: ****************************************************************************/ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) { memset( pixf, 0, sizeof(*pixf) ); #define INIT2( name, cpu ) \ pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\ pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu; #define INIT4( name, cpu ) \ INIT2( name, cpu ) \ pixf->name[PIXEL_8x16] = x264_pixel_##name##_8x16##cpu;\ pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu; #define INIT5( name, cpu ) \ INIT4( name, cpu ) \ pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu; #define INIT7( name, cpu ) \ INIT5( name, cpu ) \ pixf->name[PIXEL_4x8] = x264_pixel_##name##_4x8##cpu;\ pixf->name[PIXEL_4x4] = x264_pixel_##name##_4x4##cpu; #define INIT_ADS( cpu ) \ pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\ pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\ pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu; INIT7( sad, ); INIT7( sad_x3, ); INIT7( sad_x4, ); INIT7( ssd, ); INIT7( satd, ); INIT7( satd_x3, ); INIT7( satd_x4, ); INIT4( sa8d, ); INIT_ADS( ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8; pixf->ssim_4x4x2_core = ssim_4x4x2_core; pixf->ssim_end4 = ssim_end4; #ifdef HAVE_MMX if( cpu&X264_CPU_MMX ) { INIT7( ssd, _mmx ); } if( cpu&X264_CPU_MMXEXT ) { INIT7( sad, _mmxext ); INIT7( sad_x3, _mmxext ); INIT7( sad_x4, _mmxext ); INIT7( satd, _mmxext ); INIT7( satd_x3, _mmxext ); INIT7( satd_x4, _mmxext ); INIT_ADS( _mmxext ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_mmxext; #ifdef ARCH_X86 pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext; pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext; if( cpu&X264_CPU_CACHELINE_32 ) { INIT5( sad, _cache32_mmxext ); INIT4( sad_x3, _cache32_mmxext ); INIT4( sad_x4, _cache32_mmxext ); } else if( cpu&X264_CPU_CACHELINE_64 ) { INIT5( sad, _cache64_mmxext ); INIT4( sad_x3, _cache64_mmxext ); INIT4( sad_x4, _cache64_mmxext ); } #else if( cpu&X264_CPU_CACHELINE_64 ) { pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext; pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_cache64_mmxext; pixf->sad[PIXEL_8x4] = x264_pixel_sad_8x4_cache64_mmxext; pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext; pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_cache64_mmxext; pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext; pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_cache64_mmxext; } #endif pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext; } if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) ) { INIT2( sad, _sse2 ); INIT2( sad_x3, _sse2 ); INIT2( sad_x4, _sse2 ); INIT_ADS( _sse2 ); pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2; #ifdef ARCH_X86 if( cpu&X264_CPU_CACHELINE_64 ) { INIT2( sad, _cache64_sse2 ); INIT2( sad_x3, _cache64_sse2 ); INIT2( sad_x4, _cache64_sse2 ); } #endif } if( cpu&X264_CPU_SSE2 ) { INIT5( ssd, _sse2 ); INIT5( satd, _sse2 ); INIT5( satd_x3, _sse2 ); INIT5( satd_x4, _sse2 ); pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2; pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; #ifdef ARCH_X86_64 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; #endif } if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) ) { INIT2( sad, _sse3 ); INIT2( sad_x3, _sse3 ); INIT2( sad_x4, _sse3 ); } if( cpu&X264_CPU_SSSE3 ) { INIT7( satd, _ssse3 ); INIT7( satd_x3, _ssse3 ); INIT7( satd_x4, _ssse3 ); INIT_ADS( _ssse3 ); pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_ssse3; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_ssse3; #ifdef ARCH_X86_64 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3; #endif if( cpu&X264_CPU_CACHELINE_64 ) { INIT2( sad, _cache64_ssse3 ); INIT2( sad_x3, _cache64_ssse3 ); INIT2( sad_x4, _cache64_ssse3 ); } if( cpu&X264_CPU_PHADD_IS_FAST ) { INIT5( satd, _ssse3_phadd ); INIT5( satd_x3, _ssse3_phadd ); INIT5( satd_x4, _ssse3_phadd ); } } #endif //HAVE_MMX #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) { x264_pixel_altivec_init( pixf ); } #endif #ifdef ARCH_UltraSparc INIT4( sad, _vis ); INIT4( sad_x3, _vis ); INIT4( sad_x4, _vis ); #endif pixf->ads[PIXEL_8x16] = pixf->ads[PIXEL_8x4] = pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8]; pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8]; }