static void yuy2_to_yv12_mmxext (const unsigned char *yuy2_map, int yuy2_pitch, unsigned char *y_dst, int y_dst_pitch, unsigned char *u_dst, int u_dst_pitch, unsigned char *v_dst, int v_dst_pitch, int width, int height) { #if HAVE_MMX const uint8_t *p_line1, *p_line2 = yuy2_map; uint8_t *p_y1, *p_y2 = y_dst; uint8_t *p_u = u_dst; uint8_t *p_v = v_dst; int i_x, i_y; const int i_dest_margin = y_dst_pitch - width; const int i_dest_u_margin = u_dst_pitch - width/2; const int i_dest_v_margin = v_dst_pitch - width/2; const int i_source_margin = yuy2_pitch - width*2; __asm__ __volatile__( "pcmpeqw %mm7, %mm7 \n\t" "psrlw $8, %mm7 \n\t" /* 00 ff 00 ff 00 ff 00 ff */ ); for ( i_y = height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += yuy2_pitch; p_y1 = p_y2; p_y2 += y_dst_pitch; for ( i_x = width / 8 ; i_x-- ; ) { MMXEXT_YUYV_YUV420( ); } p_y2 += i_dest_margin; p_u += i_dest_u_margin; p_v += i_dest_v_margin; p_line2 += i_source_margin; } sfence(); emms(); #endif }
static void interpolate_packed422_scanline_mmxext( uint8_t *output, uint8_t *top, uint8_t *bot, int width ) { int i; for( i = width/16; i; --i ) { movq_m2r( *bot, mm0 ); movq_m2r( *top, mm1 ); movq_m2r( *(bot + 8), mm2 ); movq_m2r( *(top + 8), mm3 ); movq_m2r( *(bot + 16), mm4 ); movq_m2r( *(top + 16), mm5 ); movq_m2r( *(bot + 24), mm6 ); movq_m2r( *(top + 24), mm7 ); pavgb_r2r( mm1, mm0 ); pavgb_r2r( mm3, mm2 ); pavgb_r2r( mm5, mm4 ); pavgb_r2r( mm7, mm6 ); movntq_r2m( mm0, *output ); movntq_r2m( mm2, *(output + 8) ); movntq_r2m( mm4, *(output + 16) ); movntq_r2m( mm6, *(output + 24) ); output += 32; top += 32; bot += 32; } width = (width & 0xf); for( i = width/4; i; --i ) { movq_m2r( *bot, mm0 ); movq_m2r( *top, mm1 ); pavgb_r2r( mm1, mm0 ); movntq_r2m( mm0, *output ); output += 8; top += 8; bot += 8; } width = width & 0x7; /* Handle last few pixels. */ for( i = width * 2; i; --i ) { *output++ = ((*top++) + (*bot++)) >> 1; } sfence(); emms(); }
static void fast_memcpy_mmxext( void *d, const void *s, size_t n ) { const uint8_t *src = s; uint8_t *dest = d; if( dest != src ) { while( n > 64 ) { movq_m2r( src[ 0 ], mm0 ); movq_m2r( src[ 8 ], mm1 ); movq_m2r( src[ 16 ], mm2 ); movq_m2r( src[ 24 ], mm3 ); movq_m2r( src[ 32 ], mm4 ); movq_m2r( src[ 40 ], mm5 ); movq_m2r( src[ 48 ], mm6 ); movq_m2r( src[ 56 ], mm7 ); movntq_r2m( mm0, dest[ 0 ] ); movntq_r2m( mm1, dest[ 8 ] ); movntq_r2m( mm2, dest[ 16 ] ); movntq_r2m( mm3, dest[ 24 ] ); movntq_r2m( mm4, dest[ 32 ] ); movntq_r2m( mm5, dest[ 40 ] ); movntq_r2m( mm6, dest[ 48 ] ); movntq_r2m( mm7, dest[ 56 ] ); dest += 64; src += 64; n -= 64; } while( n > 8 ) { movq_m2r( src[ 0 ], mm0 ); movntq_r2m( mm0, dest[ 0 ] ); dest += 8; src += 8; n -= 8; } if( n ) small_memcpy( dest, src, n ); sfence(); emms(); } }
static void yv12_to_yuy2_mmxext (const unsigned char *y_src, int y_src_pitch, const unsigned char *u_src, int u_src_pitch, const unsigned char *v_src, int v_src_pitch, unsigned char *yuy2_map, int yuy2_pitch, int width, int height, int progressive ) { #if HAVE_MMX uint8_t *p_line1, *p_line2 = yuy2_map; const uint8_t *p_y1, *p_y2 = y_src; const uint8_t *p_u = u_src; const uint8_t *p_v = v_src; const uint8_t *p_u2 = u_src + u_src_pitch; const uint8_t *p_v2 = v_src + v_src_pitch; int i_x, i_y; const int i_source_margin = y_src_pitch - width; const int i_source_u_margin = u_src_pitch - width/2; const int i_source_v_margin = v_src_pitch - width/2; const int i_dest_margin = yuy2_pitch - width*2; if ( progressive ) { for ( i_y = height / 2; i_y-- ; ) { p_line1 = p_line2; p_line2 += yuy2_pitch; p_y1 = p_y2; p_y2 += y_src_pitch; for ( i_x = width / 8 ; i_x-- ; ) { MMXEXT_YUV420_YUYV( ); } for ( i_x = (width % 8) / 2 ; i_x-- ; ) { C_YUV420_YUYV( ); } p_y2 += i_source_margin; p_u += i_source_u_margin; p_v += i_source_v_margin; if ( i_y > 1 ) { p_u2 += i_source_u_margin; p_v2 += i_source_v_margin; } else { p_u2 = p_u; p_v2 = p_v; } p_line2 += i_dest_margin; } } else { p_u2 = u_src + 2*u_src_pitch; p_v2 = v_src + 2*v_src_pitch; for ( i_y = height / 4 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += 2 * yuy2_pitch; p_y1 = p_y2; p_y2 += 2 * y_src_pitch; for ( i_x = width / 8 ; i_x-- ; ) { MMXEXT_YUV420_YUYV( ); } for ( i_x = (width % 8) / 2 ; i_x-- ; ) { C_YUV420_YUYV( ); } p_y2 += i_source_margin + y_src_pitch; p_u += i_source_u_margin + u_src_pitch; p_v += i_source_v_margin + v_src_pitch; if ( i_y > 1 ) { p_u2 += i_source_u_margin + u_src_pitch; p_v2 += i_source_v_margin + v_src_pitch; } else { p_u2 = p_u; p_v2 = p_v; } p_line2 += i_dest_margin + yuy2_pitch; } p_line2 = yuy2_map + yuy2_pitch; p_y2 = y_src + y_src_pitch; p_u = u_src + u_src_pitch; p_v = v_src + v_src_pitch; p_u2 = u_src + 3*u_src_pitch; p_v2 = v_src + 3*v_src_pitch; for ( i_y = height / 4 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += 2 * yuy2_pitch; p_y1 = p_y2; p_y2 += 2 * y_src_pitch; for ( i_x = width / 8 ; i_x-- ; ) { MMXEXT_YUV420_YUYV( ); } for ( i_x = (width % 8) / 2 ; i_x-- ; ) { C_YUV420_YUYV( ); } p_y2 += i_source_margin + y_src_pitch; p_u += i_source_u_margin + u_src_pitch; p_v += i_source_v_margin + v_src_pitch; if ( i_y > 1 ) { p_u2 += i_source_u_margin + u_src_pitch; p_v2 += i_source_v_margin + v_src_pitch; } else { p_u2 = p_u; p_v2 = p_v; } p_line2 += i_dest_margin + yuy2_pitch; } } sfence(); emms(); #endif }