static uint8_t tinyAverageMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l) { int delta; uint32_t ww,rr; uint8_t *s1,*s2,*d1; int a1,a2; s1=src1; s2=src2; d1=dst; ww=l>>2; rr=l&3; #ifdef GCC_2_95_X __asm__( "pxor %mm7,%mm7" :: ); #else __asm__( "pxor %%mm7,%%mm7" :: ); #endif for(int y=0;y<ww;y++) { __asm__( "movd (%0),%%mm0 \n" "movd (%1),%%mm1 \n" "punpcklbw %%mm7,%%mm0 \n" "punpcklbw %%mm7,%%mm1 \n" "paddw %%mm1,%%mm0 \n" "psrlw $1,%%mm0 \n" "packuswb %%mm0, %%mm0\n" "movd %%mm0,(%2) \n" : : "r" (s1),"r" (s2),"r"(d1) ); s1+=4; s2+=4; d1+=4; } __asm__( "emms\n" :: ); if(rr) tinyAverage(d1, s1, s2,rr); return 1; }
static uint8_t averageF(ADMImage *tgt,ADMImage *src,ADMImage *srcP) { int delta; uint32_t ww,hh; uint8_t *s1,*s2,*d1; int a1,a2,t1; tinyAverage(YPLANE(tgt),YPLANE(src),YPLANE(srcP),tgt->_width,tgt->_height); tinyAverage(UPLANE(tgt),UPLANE(src),UPLANE(srcP),tgt->_width>>1,tgt->_height>>1); tinyAverage(VPLANE(tgt),VPLANE(src),VPLANE(srcP),tgt->_width>>1,tgt->_height>>1); return 1; }
static uint8_t tinyAverageMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l) { int delta; uint32_t ww,rr; uint8_t *s1,*s2,*d1; int a1,a2; s1=src1; s2=src2; d1=dst; ww=l>>2; rr=l&3; ADM_CLEAR_MM7(); for(int y=0; y<ww; y++) { __asm__ volatile( "movd (%0),%%mm0 \n" "movd (%1),%%mm1 \n" "punpcklbw %%mm7,%%mm0 \n" "punpcklbw %%mm7,%%mm1 \n" "paddw %%mm1,%%mm0 \n" "psrlw $1,%%mm0 \n" "packuswb %%mm0, %%mm0\n" "movd %%mm0,(%2) \n" : : "r" (s1),"r" (s2),"r"(d1) :"memory" ); s1+=4; s2+=4; d1+=4; } ADM_EMMS(); if(rr) tinyAverage(d1, s1, s2,rr); return 1; }