Exemple #1
0
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
    memset( pixf, 0, sizeof(*pixf) );

#define INIT2( name, cpu ) \
    pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
    pixf->name[PIXEL_16x8]  = x264_pixel_##name##_16x8##cpu;
#define INIT4( name, cpu ) \
    INIT2( name, cpu ) \
    pixf->name[PIXEL_8x16]  = x264_pixel_##name##_8x16##cpu;\
    pixf->name[PIXEL_8x8]   = x264_pixel_##name##_8x8##cpu;
#define INIT5( name, cpu ) \
    INIT4( name, cpu ) \
    pixf->name[PIXEL_8x4]   = x264_pixel_##name##_8x4##cpu;
#define INIT7( name, cpu ) \
    INIT5( name, cpu ) \
    pixf->name[PIXEL_4x8]   = x264_pixel_##name##_4x8##cpu;\
    pixf->name[PIXEL_4x4]   = x264_pixel_##name##_4x4##cpu;

#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

    INIT7( sad, );
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
    INIT4( sa8d, );
    INIT_ADS( );

    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;

#ifdef HAVE_MMX
    if( cpu&X264_CPU_MMX )
    {
        INIT7( ssd, _mmx );
    }

    if( cpu&X264_CPU_MMXEXT )
    {
        INIT7( sad, _mmxext );
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
        INIT_ADS( _mmxext );

#ifdef ARCH_X86
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;

        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            if( cpu&X264_CPU_CACHELINE_32 )
            {
                INIT5( sad, _cache32_mmxext );
                INIT4( sad_x3, _cache32_mmxext );
                INIT4( sad_x4, _cache32_mmxext );
            }
            else
            {
                INIT5( sad, _cache64_mmxext );
                INIT4( sad_x3, _cache64_mmxext );
                INIT4( sad_x4, _cache64_mmxext );
            }
        }
#else
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
#endif
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
    }

    // disable on AMD processors since it is slower
    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
    {
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
        INIT5( satd, _sse2 );
        INIT_ADS( _sse2 );

#ifdef ARCH_X86
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
        }
#endif
    }
    // these are faster on both Intel and AMD
    if( cpu&X264_CPU_SSE2 )
    {
        INIT2( ssd, _sse2 );
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;

#ifdef ARCH_X86_64
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
    }

#ifdef HAVE_SSE3
    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) )
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

    if( cpu&X264_CPU_SSSE3 )
    {
        INIT5( satd, _ssse3 );
        INIT_ADS( _ssse3 );
#ifdef ARCH_X86_64
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
#endif
        if( cpu&X264_CPU_CACHELINE_SPLIT )
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
    }
#endif //HAVE_SSE3
#endif //HAVE_MMX

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
#ifdef ARCH_UltraSparc
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
#endif

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
}
Exemple #2
0
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
    memset( pixf, 0, sizeof(*pixf) );

#define INIT2( name, cpu ) \
    pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
    pixf->name[PIXEL_16x8]  = x264_pixel_##name##_16x8##cpu;
#define INIT4( name, cpu ) \
    INIT2( name, cpu ) \
    pixf->name[PIXEL_8x16]  = x264_pixel_##name##_8x16##cpu;\
    pixf->name[PIXEL_8x8]   = x264_pixel_##name##_8x8##cpu;
#define INIT5( name, cpu ) \
    INIT4( name, cpu ) \
    pixf->name[PIXEL_8x4]   = x264_pixel_##name##_8x4##cpu;
#define INIT7( name, cpu ) \
    INIT5( name, cpu ) \
    pixf->name[PIXEL_4x8]   = x264_pixel_##name##_4x8##cpu;\
    pixf->name[PIXEL_4x4]   = x264_pixel_##name##_4x4##cpu;

#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

    INIT7( sad, );
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
    INIT7( satd_x3, );
    INIT7( satd_x4, );
    INIT4( sa8d, );
    INIT_ADS( );

    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16;
    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8;

    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;

#ifdef HAVE_MMX
    if( cpu&X264_CPU_MMX )
    {
        INIT7( ssd, _mmx );
    }

    if( cpu&X264_CPU_MMXEXT )
    {
        INIT7( sad, _mmxext );
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
        INIT7( satd_x3, _mmxext );
        INIT7( satd_x4, _mmxext );
        INIT_ADS( _mmxext );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmxext;
#ifdef ARCH_X86
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;

        if( cpu&X264_CPU_CACHELINE_32 )
        {
            INIT5( sad, _cache32_mmxext );
            INIT4( sad_x3, _cache32_mmxext );
            INIT4( sad_x4, _cache32_mmxext );
        }
        else if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT5( sad, _cache64_mmxext );
            INIT4( sad_x3, _cache64_mmxext );
            INIT4( sad_x4, _cache64_mmxext );
        }
#else
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
#endif
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
    }

    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
    {
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
        INIT_ADS( _sse2 );
        pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;

#ifdef ARCH_X86
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
        }
#endif
    }
    if( cpu&X264_CPU_SSE2 )
    {
        INIT5( ssd, _sse2 );
        INIT5( satd, _sse2 );
        INIT5( satd_x3, _sse2 );
        INIT5( satd_x4, _sse2 );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
#ifdef ARCH_X86_64
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
    }

    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) )
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

    if( cpu&X264_CPU_SSSE3 )
    {
        INIT7( satd, _ssse3 );
        INIT7( satd_x3, _ssse3 );
        INIT7( satd_x4, _ssse3 );
        INIT_ADS( _ssse3 );
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3;
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_ssse3;
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_ssse3;
#ifdef ARCH_X86_64
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
#endif
        if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
        if( cpu&X264_CPU_PHADD_IS_FAST )
        {
            INIT5( satd, _ssse3_phadd );
            INIT5( satd_x3, _ssse3_phadd );
            INIT5( satd_x4, _ssse3_phadd );
        }
    }
#endif //HAVE_MMX

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
#ifdef ARCH_UltraSparc
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
#endif

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
}