Beispiel #1
0
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
    memset( pixf, 0, sizeof(*pixf) );

#define INIT2( name, cpu ) \
    pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
    pixf->name[PIXEL_16x8]  = x264_pixel_##name##_16x8##cpu;
#define INIT4( name, cpu ) \
    INIT2( name, cpu ) \
    pixf->name[PIXEL_8x16]  = x264_pixel_##name##_8x16##cpu;\
    pixf->name[PIXEL_8x8]   = x264_pixel_##name##_8x8##cpu;
#define INIT5( name, cpu ) \
    INIT4( name, cpu ) \
    pixf->name[PIXEL_8x4]   = x264_pixel_##name##_8x4##cpu;
#define INIT7( name, cpu ) \
    INIT5( name, cpu ) \
    pixf->name[PIXEL_4x8]   = x264_pixel_##name##_4x8##cpu;\
    pixf->name[PIXEL_4x4]   = x264_pixel_##name##_4x4##cpu;

#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

    INIT7( sad, );
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
    INIT4( sa8d, );
    INIT_ADS( );

    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;

#ifdef HAVE_MMX
    if( cpu&X264_CPU_MMX )
    {
        INIT7( ssd, _mmx );
    }

    if( cpu&X264_CPU_MMXEXT )
    {
        INIT7( sad, _mmxext );
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
        INIT_ADS( _mmxext );

#ifdef ARCH_X86
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;

        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            if( cpu&X264_CPU_CACHELINE_32 )
            {
                INIT5( sad, _cache32_mmxext );
                INIT4( sad_x3, _cache32_mmxext );
                INIT4( sad_x4, _cache32_mmxext );
            }
            else
            {
                INIT5( sad, _cache64_mmxext );
                INIT4( sad_x3, _cache64_mmxext );
                INIT4( sad_x4, _cache64_mmxext );
            }
        }
#else
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
#endif
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
    }

    // disable on AMD processors since it is slower
    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
    {
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
        INIT5( satd, _sse2 );
        INIT_ADS( _sse2 );

#ifdef ARCH_X86
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
        }
#endif
    }
    // these are faster on both Intel and AMD
    if( cpu&X264_CPU_SSE2 )
    {
        INIT2( ssd, _sse2 );
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;

#ifdef ARCH_X86_64
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
    }

#ifdef HAVE_SSE3
    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) )
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

    if( cpu&X264_CPU_SSSE3 )
    {
        INIT5( satd, _ssse3 );
        INIT_ADS( _ssse3 );
#ifdef ARCH_X86_64
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
#endif
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
    }
#endif //HAVE_SSE3
#endif //HAVE_MMX

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
#ifdef ARCH_UltraSparc
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
#endif

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
}
/* values used by math functions -- IEEE 754 float version */
#include "wctype.h"
#include "xmath.h"
_STD_BEGIN

		/* macros */
#define NBITS	(16 + _FOFF)
#if _D0
 #define INIT(w0)		{0, w0}
 #define INIT2(w0, w1)	{w1, w0}
#else
 #define INIT(w0)		{w0, 0}
 #define INIT2(w0, w1)	{w0, w1}
#endif
		/* static data */
_CRTIMP2 const _Dconst _FDenorm = {INIT2(0, 1)};
_CRTIMP2 const _Dconst _FEps = {INIT((_FBIAS - NBITS - 1) << _FOFF)};
_CRTIMP2 const _Dconst _FInf = {INIT(_FMAX << _FOFF)};
_CRTIMP2 const _Dconst _FNan = {INIT(_FSIGN | (_FMAX << _FOFF)
	| (1 << _FOFF - 1))};
_CRTIMP2 const _Dconst _FSnan = {INIT(_FSIGN | (_FMAX << _FOFF))};
_CRTIMP2 const _Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)};
_CRTIMP2 const float _FXbig = (NBITS + 1) * 347L / 1000;

#if defined(__CENTERLINE__)
 #define _DYNAMIC_INIT_CONST(x) \
	(x._F = *(float *)(void *)(x._W))
_DYNAMIC_INIT_CONST(_FEps);
_DYNAMIC_INIT_CONST(_FInf);
_DYNAMIC_INIT_CONST(_FNan);
_DYNAMIC_INIT_CONST(_FRteps);
Beispiel #3
0
 #if _DLONG == 0
		/* macros -- 64-bit */
  #define NBITS	(48 + _DOFF)

  #if _D0 == 0
   #define INIT(w0)		{w0, 0, 0, 0}
   #define INIT2(w0, w1)	{w0, 0, 0, w1}

  #else /* _DLONG == 0 */
   #define INIT(w0)		{0, 0, 0, w0}
   #define INIT2(w0, w1)	{w1, 0, 0, w0}
  #endif /* _DLONG == 0 */

		/* static data */
extern /* const */ _Dconst _LDenorm = {INIT2(0, 1)};
extern /* const */ _Dconst _LEps = {
	INIT((_DBIAS - NBITS - 1) << _DOFF)};
extern /* const */ _Dconst _LInf = {INIT(_DMAX << _DOFF)};
extern /* const */ _Dconst _LNan = {INIT((_DMAX << _DOFF)
	| (1 << (_DOFF - 1)))};
extern /* const */ _Dconst _LSnan = {INIT2(_DMAX << _DOFF, 1)};
extern /* const */ _Dconst _LRteps = {
	INIT((_DBIAS - NBITS / 2) << _DOFF)};

 #elif _DLONG == 1
		/* macros -- 80-bit */
  #define NBITS	64

  #if _D0 == 0
   #define INIT(w0, w1)		{w0, w1, 0, 0, 0}
void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
{
#if HIGH_BIT_DEPTH
    if (cpuMask & X265_CPU_SSE2) p.sa8d[0] = p.sa8d[0];
#else
    if (cpuMask & X265_CPU_SSE2)
    {
        INIT8_NAME(sse_pp, ssd, _mmx);
        INIT8(sad, _mmx2);
        INIT8(sad_x3, _mmx2);
        INIT8(sad_x4, _mmx2);
        INIT8(satd, _mmx2);
        p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
        p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
        p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
        p.satd[LUMA_16x12] = x265_pixel_satd_16x12_sse2;
        p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2;
        p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2;
        p.satd[LUMA_32x8]  = x265_pixel_satd_32x8_sse2;
        p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse2;
        p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2;
        p.sa8d[BLOCK_4x4]  = x265_pixel_satd_4x4_mmx2;
        p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;

        PIXEL_AVG(sse2);
        PIXEL_AVG_W4(mmx2);

        p.sad[LUMA_8x32]  = x265_pixel_sad_8x32_sse2;
        p.sad[LUMA_16x4]  = x265_pixel_sad_16x4_sse2;
        p.sad[LUMA_16x12] = x265_pixel_sad_16x12_sse2;
        p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2;
        p.sad[LUMA_16x64] = x265_pixel_sad_16x64_sse2;

        p.sad[LUMA_32x8]  = x265_pixel_sad_32x8_sse2;
        p.sad[LUMA_32x16] = x265_pixel_sad_32x16_sse2;
        p.sad[LUMA_32x24] = x265_pixel_sad_32x24_sse2;
        p.sad[LUMA_32x32] = x265_pixel_sad_32x32_sse2;
        p.sad[LUMA_32x64] = x265_pixel_sad_32x64_sse2;

        p.sad[LUMA_64x16] = x265_pixel_sad_64x16_sse2;
        p.sad[LUMA_64x32] = x265_pixel_sad_64x32_sse2;
        p.sad[LUMA_64x48] = x265_pixel_sad_64x48_sse2;
        p.sad[LUMA_64x64] = x265_pixel_sad_64x64_sse2;

        p.sad[LUMA_48x64] = x265_pixel_sad_48x64_sse2;
        p.sad[LUMA_24x32] = x265_pixel_sad_24x32_sse2;
        p.sad[LUMA_12x16] = x265_pixel_sad_12x16_sse2;

        ASSGN_SSE(sse2);
        INIT2(sad, _sse2);
        INIT2(sad_x3, _sse2);
        INIT2(sad_x4, _sse2);
        INIT6(satd, _sse2);
        HEVC_SATD(sse2);

        CHROMA_BLOCKCOPY(_sse2);
        LUMA_BLOCKCOPY(_sse2);

        CHROMA_SS_FILTERS(_sse2);
        LUMA_SS_FILTERS(_sse2);

        // This function pointer initialization is temporary will be removed
        // later with macro definitions.  It is used to avoid linker errors
        // until all partitions are coded and commit smaller patches, easier to
        // review.

        p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2;
        p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2;
        p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2;
        p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2;
        p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2;
        p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2;
        p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2;
        p.chroma_copy_sp[CHROMA_8x8] = x265_blockcopy_sp_8x8_sse2;
        p.chroma_copy_sp[CHROMA_8x16] = x265_blockcopy_sp_8x16_sse2;
        p.chroma_copy_sp[CHROMA_12x16] = x265_blockcopy_sp_12x16_sse2;
        p.chroma_copy_sp[CHROMA_16x4] = x265_blockcopy_sp_16x4_sse2;
        p.chroma_copy_sp[CHROMA_16x8] = x265_blockcopy_sp_16x8_sse2;
        p.chroma_copy_sp[CHROMA_16x12] = x265_blockcopy_sp_16x12_sse2;
        p.chroma_copy_sp[CHROMA_16x16] = x265_blockcopy_sp_16x16_sse2;
        p.chroma_copy_sp[CHROMA_16x32] = x265_blockcopy_sp_16x32_sse2;
        p.luma_copy_sp[LUMA_16x64] = x265_blockcopy_sp_16x64_sse2;
        p.chroma_copy_sp[CHROMA_24x32] = x265_blockcopy_sp_24x32_sse2;
        p.chroma_copy_sp[CHROMA_32x8] = x265_blockcopy_sp_32x8_sse2;
        p.chroma_copy_sp[CHROMA_32x16] = x265_blockcopy_sp_32x16_sse2;
        p.chroma_copy_sp[CHROMA_32x24] = x265_blockcopy_sp_32x24_sse2;
        p.chroma_copy_sp[CHROMA_32x32] = x265_blockcopy_sp_32x32_sse2;
        p.luma_copy_sp[LUMA_32x64] = x265_blockcopy_sp_32x64_sse2;
        p.luma_copy_sp[LUMA_48x64] = x265_blockcopy_sp_48x64_sse2;
        p.luma_copy_sp[LUMA_64x16] = x265_blockcopy_sp_64x16_sse2;
        p.luma_copy_sp[LUMA_64x32] = x265_blockcopy_sp_64x32_sse2;
        p.luma_copy_sp[LUMA_64x48] = x265_blockcopy_sp_64x48_sse2;
        p.luma_copy_sp[LUMA_64x64] = x265_blockcopy_sp_64x64_sse2;

        p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
        p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
        p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
        p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;

        p.frame_init_lowres_core = x265_frame_init_lowres_core_sse2;
        p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_sse2;
        p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse2;
        SA8D_INTER_FROM_BLOCK(sse2);

        p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
        p.ipfilter_ss[FILTER_V_S_S_8] = x265_interp_8tap_v_ss_sse2;
        p.calcrecon[BLOCK_4x4] = x265_calcRecons4_sse2;
        p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
        p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
        p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
    }
    if (cpuMask & X265_CPU_SSSE3)
    {
        p.frame_init_lowres_core = x265_frame_init_lowres_core_ssse3;
        p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_ssse3;
        p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_ssse3;
        SA8D_INTER_FROM_BLOCK(ssse3);
        p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
        ASSGN_SSE(ssse3);
        PIXEL_AVG(ssse3);
        PIXEL_AVG_W4(ssse3);

        p.scale1D_128to64 = x265_scale1D_128to64_ssse3;

        p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
        p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
        p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
        p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3;
        p.sad_x3[LUMA_8x32]  = x265_pixel_sad_x3_8x32_ssse3;
        p.sad_x4[LUMA_8x32]  = x265_pixel_sad_x4_8x32_ssse3;

        p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3;
        p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3;
        p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ssse3;
        p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ssse3;
        p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ssse3;
        p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ssse3;
        p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ssse3;
        p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ssse3;
        p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ssse3;
        p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ssse3;
        p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ssse3;
        p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ssse3;
        p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ssse3;
        p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ssse3;
        p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ssse3;
        p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ssse3;
        p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ssse3;
        p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ssse3;
        p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ssse3;
        p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ssse3;
        p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ssse3;
        p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ssse3;
        p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ssse3;
        p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ssse3;
        p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ssse3;
        p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ssse3;
        p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ssse3;
        p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ssse3;
        p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ssse3;
        p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ssse3;

        p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
        p.luma_p2s = x265_luma_p2s_ssse3;
        p.chroma_p2s = x265_chroma_p2s_ssse3;
        
        CHROMA_SP_FILTERS(_ssse3);
        LUMA_SP_FILTERS(_ssse3);

    }
    if (cpuMask & X265_CPU_SSE4)
    {
        p.satd[LUMA_4x16]   = x265_pixel_satd_4x16_sse4;
        p.satd[LUMA_12x16]  = x265_pixel_satd_12x16_sse4;
        p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse4;
        p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse4;
        p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse4;
        p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_sse4;
        p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse4;
        SA8D_INTER_FROM_BLOCK(sse4);

        CHROMA_FILTERS(_sse4);
        LUMA_FILTERS(_sse4);
        HEVC_SATD(sse4);
        p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
        p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
        p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;

        p.chroma_vsp[CHROMA_2x4] = x265_interp_4tap_vert_sp_2x4_sse4;
        p.chroma_vsp[CHROMA_2x8] = x265_interp_4tap_vert_sp_2x8_sse4;
        p.chroma_vsp[CHROMA_6x8] = x265_interp_4tap_vert_sp_6x8_sse4;

        p.calcrecon[BLOCK_16x16] = x265_calcRecons16_sse4;
        p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4;
        p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4;
        p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
    }
    if (cpuMask & X265_CPU_AVX)
    {
        p.frame_init_lowres_core = x265_frame_init_lowres_core_avx;
        p.satd[LUMA_4x16]   = x265_pixel_satd_4x16_avx;
        p.satd[LUMA_12x16]  = x265_pixel_satd_12x16_avx;
        p.satd[LUMA_32x8] = x265_pixel_satd_32x8_avx;
        p.satd[LUMA_32x16] = x265_pixel_satd_32x16_avx;
        p.satd[LUMA_32x24] = x265_pixel_satd_32x24_avx;
        p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_avx;
        p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_avx;
        SA8D_INTER_FROM_BLOCK(avx);
        ASSGN_SSE(avx);
        HEVC_SATD(avx);

        p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx;
        p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx;
        p.sad_x3[LUMA_16x4]  = x265_pixel_sad_x3_16x4_avx;
        p.sad_x4[LUMA_16x4]  = x265_pixel_sad_x4_16x4_avx;
        p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_avx;
        p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx;
        p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_avx;
        p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx;
        p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_avx;
        p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_avx;
        p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_avx;
        p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_avx;

        p.sad_x3[LUMA_32x8]  = x265_pixel_sad_x3_32x8_avx;
        p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_avx;
        p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_avx;
        p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_avx;
        p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_avx;
        p.sad_x4[LUMA_32x8]  = x265_pixel_sad_x4_32x8_avx;
        p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_avx;
        p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_avx;
        p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_avx;
        p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_avx;
        p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_avx;
        p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_avx;
        p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_avx;
        p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_avx;
        p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_avx;
        p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_avx;
        p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_avx;
        p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_avx;
        p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_avx;
        p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_avx;
    }
    if (cpuMask & X265_CPU_XOP)
    {
        p.frame_init_lowres_core = x265_frame_init_lowres_core_xop;
        p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_xop;
        p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_xop;
        SA8D_INTER_FROM_BLOCK(xop);
        INIT7(satd, _xop);
        INIT5_NAME(sse_pp, ssd, _xop);
        HEVC_SATD(xop);
    }
    if (cpuMask & X265_CPU_AVX2)
    {
        INIT2(sad_x4, _avx2);
        INIT4(satd, _avx2);
        INIT2_NAME(sse_pp, ssd, _avx2);
        p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx2;
        SA8D_INTER_FROM_BLOCK8(avx2);
        p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_avx2>;
        p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_64x48] = cmp<64, 48, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_48x64] = cmp<48, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
        p.satd[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_satd_16x16_avx2>;

        p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
        p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
    }
#endif // if HIGH_BIT_DEPTH
}
Beispiel #5
0
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
    memset( pixf, 0, sizeof(*pixf) );

#define INIT2( name, cpu ) \
    pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
    pixf->name[PIXEL_16x8]  = x264_pixel_##name##_16x8##cpu;
#define INIT4( name, cpu ) \
    INIT2( name, cpu ) \
    pixf->name[PIXEL_8x16]  = x264_pixel_##name##_8x16##cpu;\
    pixf->name[PIXEL_8x8]   = x264_pixel_##name##_8x8##cpu;
#define INIT5( name, cpu ) \
    INIT4( name, cpu ) \
    pixf->name[PIXEL_8x4]   = x264_pixel_##name##_8x4##cpu;
#define INIT7( name, cpu ) \
    INIT5( name, cpu ) \
    pixf->name[PIXEL_4x8]   = x264_pixel_##name##_4x8##cpu;\
    pixf->name[PIXEL_4x4]   = x264_pixel_##name##_4x4##cpu;

#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

    INIT7( sad, );
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
    INIT7( satd_x3, );
    INIT7( satd_x4, );
    INIT4( sa8d, );
    INIT_ADS( );

    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16;
    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8;

    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;

#ifdef HAVE_MMX
    if( cpu&X264_CPU_MMX )
    {
        INIT7( ssd, _mmx );
    }

    if( cpu&X264_CPU_MMXEXT )
    {
        INIT7( sad, _mmxext );
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
        INIT7( satd_x3, _mmxext );
        INIT7( satd_x4, _mmxext );
        INIT_ADS( _mmxext );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmxext;
#ifdef ARCH_X86
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;

        if( cpu&X264_CPU_CACHELINE_32 )
        {
            INIT5( sad, _cache32_mmxext );
            INIT4( sad_x3, _cache32_mmxext );
            INIT4( sad_x4, _cache32_mmxext );
        }
        else if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT5( sad, _cache64_mmxext );
            INIT4( sad_x3, _cache64_mmxext );
            INIT4( sad_x4, _cache64_mmxext );
        }
#else
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
#endif
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
    }

    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
    {
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
        INIT_ADS( _sse2 );
        pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;

#ifdef ARCH_X86
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
        }
#endif
    }
    if( cpu&X264_CPU_SSE2 )
    {
        INIT5( ssd, _sse2 );
        INIT5( satd, _sse2 );
        INIT5( satd_x3, _sse2 );
        INIT5( satd_x4, _sse2 );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
#ifdef ARCH_X86_64
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
    }

    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) )
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

    if( cpu&X264_CPU_SSSE3 )
    {
        INIT7( satd, _ssse3 );
        INIT7( satd_x3, _ssse3 );
        INIT7( satd_x4, _ssse3 );
        INIT_ADS( _ssse3 );
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_ssse3;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_ssse3;
#ifdef ARCH_X86_64
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
#endif
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
        if( cpu&X264_CPU_PHADD_IS_FAST )
        {
            INIT5( satd, _ssse3_phadd );
            INIT5( satd_x3, _ssse3_phadd );
            INIT5( satd_x4, _ssse3_phadd );
        }
    }
#endif //HAVE_MMX

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
#ifdef ARCH_UltraSparc
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
#endif

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
}
Beispiel #6
0
_STD_BEGIN

		/* macros */
#define NBITS	(16 + _FOFF)

 #if _D0 == 0
  #define INIT(w0)		{w0, 0}
  #define INIT2(w0, w1)	{w0, w1}

 #else /* _D0 == 0 */
  #define INIT(w0)		{0, w0}
  #define INIT2(w0, w1)	{w1, w0}
 #endif /* _D0 == 0 */

		/* static data */
/* extern const */ _Dconst _FDenorm = {INIT2(0, 1)};
/* extern const */ _Dconst _FEps = {
	INIT((_FBIAS - NBITS - 1) << _FOFF)};
/* extern const */ _Dconst _FInf = {INIT(_FMAX << _FOFF)};
/* extern const */ _Dconst _FNan = {INIT((_FMAX << _FOFF)
	| (1 << (_FOFF - 1)))};
/* extern const */ _Dconst _FSnan = {INIT2(_FMAX << _FOFF, 1)};
/* extern const */ _Dconst _FRteps = {
	INIT((_FBIAS - NBITS / 2) << _FOFF)};

/* extern const */ float _FXbig = (NBITS + 1) * 347L / 1000;
/* extern const */ float _FZero = 0.0F;
_STD_END

/*
 * Copyright (c) 1992-2002 by P.J. Plauger.  ALL RIGHTS RESERVED.