Пример #1
0
static void yuy2_to_yv12_mmxext
(const unsigned char *yuy2_map, int yuy2_pitch,
 unsigned char *y_dst, int y_dst_pitch, 
 unsigned char *u_dst, int u_dst_pitch, 
 unsigned char *v_dst, int v_dst_pitch, 
 int width, int height) 
{
#if HAVE_MMX
    const uint8_t *p_line1, *p_line2 = yuy2_map;
    uint8_t *p_y1, *p_y2 = y_dst;
    uint8_t *p_u = u_dst;
    uint8_t *p_v = v_dst;

    int i_x, i_y;

    const int i_dest_margin = y_dst_pitch - width;
    const int i_dest_u_margin = u_dst_pitch - width/2;
    const int i_dest_v_margin = v_dst_pitch - width/2;
    const int i_source_margin = yuy2_pitch - width*2;

    __asm__ __volatile__(
            "pcmpeqw %mm7, %mm7           \n\t"
            "psrlw $8, %mm7               \n\t" /* 00 ff 00 ff 00 ff 00 ff */
            );

    for ( i_y = height / 2 ; i_y-- ; )
    {
        p_line1 = p_line2;
        p_line2 += yuy2_pitch;

        p_y1 = p_y2;
        p_y2 += y_dst_pitch;

        for ( i_x = width / 8 ; i_x-- ; )
        {
            MMXEXT_YUYV_YUV420( );
        }

        p_y2 += i_dest_margin;
        p_u += i_dest_u_margin;
        p_v += i_dest_v_margin;
        p_line2 += i_source_margin;
    }

    sfence();
    emms();
#endif
}
Пример #2
0
static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top,
                                                   uint8_t *bot, int width )
{
    int i;

    for( i = width/16; i; --i ) {
        movq_m2r( *bot, mm0 );
        movq_m2r( *top, mm1 );
        movq_m2r( *(bot + 8), mm2 );
        movq_m2r( *(top + 8), mm3 );
        movq_m2r( *(bot + 16), mm4 );
        movq_m2r( *(top + 16), mm5 );
        movq_m2r( *(bot + 24), mm6 );
        movq_m2r( *(top + 24), mm7 );
        pavgb_r2r( mm1, mm0 );
        pavgb_r2r( mm3, mm2 );
        pavgb_r2r( mm5, mm4 );
        pavgb_r2r( mm7, mm6 );
        movntq_r2m( mm0, *output );
        movntq_r2m( mm2, *(output + 8) );
        movntq_r2m( mm4, *(output + 16) );
        movntq_r2m( mm6, *(output + 24) );
        output += 32;
        top += 32;
        bot += 32;
    }
    width = (width & 0xf);

    for( i = width/4; i; --i ) {
        movq_m2r( *bot, mm0 );
        movq_m2r( *top, mm1 );
        pavgb_r2r( mm1, mm0 );
        movntq_r2m( mm0, *output );
        output += 8;
        top += 8;
        bot += 8;
    }
    width = width & 0x7;

    /* Handle last few pixels. */
    for( i = width * 2; i; --i ) {
        *output++ = ((*top++) + (*bot++)) >> 1;
    }

    sfence();
    emms();
}
Пример #3
0
static void fast_memcpy_mmxext( void *d, const void *s, size_t n )
{
    const uint8_t *src = s;
    uint8_t *dest = d;

    if( dest != src ) {
        while( n > 64 ) {
            movq_m2r( src[ 0 ], mm0 );
            movq_m2r( src[ 8 ], mm1 );
            movq_m2r( src[ 16 ], mm2 );
            movq_m2r( src[ 24 ], mm3 );
            movq_m2r( src[ 32 ], mm4 );
            movq_m2r( src[ 40 ], mm5 );
            movq_m2r( src[ 48 ], mm6 );
            movq_m2r( src[ 56 ], mm7 );
            movntq_r2m( mm0, dest[ 0 ] );
            movntq_r2m( mm1, dest[ 8 ] );
            movntq_r2m( mm2, dest[ 16 ] );
            movntq_r2m( mm3, dest[ 24 ] );
            movntq_r2m( mm4, dest[ 32 ] );
            movntq_r2m( mm5, dest[ 40 ] );
            movntq_r2m( mm6, dest[ 48 ] );
            movntq_r2m( mm7, dest[ 56 ] );
            dest += 64;
            src += 64;
            n -= 64;
        }

        while( n > 8 ) {
            movq_m2r( src[ 0 ], mm0 );
            movntq_r2m( mm0, dest[ 0 ] );
            dest += 8;
            src += 8;
            n -= 8;
        }

        if( n ) small_memcpy( dest, src, n );

        sfence();
        emms();
    }
}
Пример #4
0
static void yv12_to_yuy2_mmxext
(const unsigned char *y_src, int y_src_pitch, 
 const unsigned char *u_src, int u_src_pitch, 
 const unsigned char *v_src, int v_src_pitch, 
 unsigned char *yuy2_map, int yuy2_pitch,
 int width, int height, int progressive ) 
{
#if HAVE_MMX
    uint8_t *p_line1, *p_line2 = yuy2_map;
    const uint8_t *p_y1, *p_y2 = y_src;
    const uint8_t *p_u = u_src;
    const uint8_t *p_v = v_src;
    const uint8_t *p_u2 = u_src + u_src_pitch;
    const uint8_t *p_v2 = v_src + v_src_pitch;

    int i_x, i_y;

    const int i_source_margin = y_src_pitch - width;
    const int i_source_u_margin = u_src_pitch - width/2;
    const int i_source_v_margin = v_src_pitch - width/2;
    const int i_dest_margin = yuy2_pitch - width*2;

    if ( progressive ) 
    {
        for ( i_y = height / 2; i_y-- ; )
        {
            p_line1 = p_line2;
            p_line2 += yuy2_pitch;

            p_y1 = p_y2;
            p_y2 += y_src_pitch;

            for ( i_x = width / 8 ; i_x-- ; )
            {
                MMXEXT_YUV420_YUYV( );
            }
            for ( i_x = (width % 8) / 2 ; i_x-- ; )
            {
                C_YUV420_YUYV( );
            }

            p_y2 += i_source_margin;
            p_u += i_source_u_margin;
            p_v += i_source_v_margin;
            if ( i_y > 1 ) 
            {
                p_u2 += i_source_u_margin;
                p_v2 += i_source_v_margin;
            } 
            else 
            {
                p_u2 = p_u;
                p_v2 = p_v;
            }
            p_line2 += i_dest_margin;
        }
    } 
    else
    {
        p_u2 = u_src + 2*u_src_pitch;
        p_v2 = v_src + 2*v_src_pitch;
        for ( i_y = height / 4 ; i_y-- ; )
        {
            p_line1 = p_line2;
            p_line2 += 2 * yuy2_pitch;

            p_y1 = p_y2;
            p_y2 += 2 * y_src_pitch;

            for ( i_x = width / 8 ; i_x-- ; )
            {
                MMXEXT_YUV420_YUYV( );
            }
            for ( i_x = (width % 8) / 2 ; i_x-- ; )
            {
                C_YUV420_YUYV( );
            }

            p_y2 += i_source_margin + y_src_pitch;
            p_u += i_source_u_margin + u_src_pitch;
            p_v += i_source_v_margin + v_src_pitch;
            if ( i_y > 1 ) 
            {
                p_u2 += i_source_u_margin + u_src_pitch;
                p_v2 += i_source_v_margin + v_src_pitch;
            } 
            else 
            {
                p_u2 = p_u;
                p_v2 = p_v;
            }
            p_line2 += i_dest_margin + yuy2_pitch;
        }

        p_line2 = yuy2_map + yuy2_pitch;
        p_y2 = y_src + y_src_pitch;
        p_u = u_src + u_src_pitch;
        p_v = v_src + v_src_pitch;
        p_u2 = u_src + 3*u_src_pitch;
        p_v2 = v_src + 3*v_src_pitch;

        for ( i_y = height / 4 ; i_y-- ; )
        {
            p_line1 = p_line2;
            p_line2 += 2 * yuy2_pitch;

            p_y1 = p_y2;
            p_y2 += 2 * y_src_pitch;

            for ( i_x = width / 8 ; i_x-- ; )
            {
                MMXEXT_YUV420_YUYV( );
            }
            for ( i_x = (width % 8) / 2 ; i_x-- ; )
            {
                C_YUV420_YUYV( );
            }

            p_y2 += i_source_margin + y_src_pitch;
            p_u += i_source_u_margin + u_src_pitch;
            p_v += i_source_v_margin + v_src_pitch;
            if ( i_y > 1 ) 
            {
                p_u2 += i_source_u_margin + u_src_pitch;
                p_v2 += i_source_v_margin + v_src_pitch;
            } 
            else 
            {
                p_u2 = p_u;
                p_v2 = p_v;
            }
            p_line2 += i_dest_margin + yuy2_pitch;
        }
    }

    sfence();
    emms();

#endif
}