uint8_t ADMImage::substract(ADMImage *src1,ADMImage *src2) { #ifdef ADM_CPU_X86 uint32_t r1,r2; if(CpuCaps::hasMMX()) { return tinySubstractMMX(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height); } #endif return tinySubstract(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height); }
uint8_t ADMImage::substract(ADMImage *src1,ADMImage *src2) { #if 1 && defined( ARCH_X86 ) || defined (ARCH_X86_64) uint32_t r1,r2; if(CpuCaps::hasMMX()) { return tinySubstractMMX(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height); } #endif return tinySubstract(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height); }
static uint8_t tinySubstractMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l) { int delta; uint32_t ww,hh; uint8_t *s1,*s2,*d1; int ll,rr; ll=l>>2; rr=l&3; s1=src1; s2=src2; d1=dst; #ifdef GCC_2_95_X __asm__( "pxor %mm7,%mm7" :: ); #else __asm__( "pxor %%mm7,%%mm7" :: ); #endif for(int x=0;x<ll;x++) { __asm__( "movd (%0),%%mm0 \n" "movd (%1),%%mm1 \n" "punpcklbw %%mm7,%%mm0 \n" "punpcklbw %%mm7,%%mm1 \n" "paddw %%mm0,%%mm0 \n" "psubusw %%mm1,%%mm0 \n" // mm1=sum "packuswb %%mm0, %%mm0\n" "movd %%mm0,(%2) \n" :: "r"(s1),"r"(s2),"r"(d1) ); s1+=4; s2+=4; d1+=4; } __asm__( "emms\n" :: ); if(rr) tinySubstractMMX(d1, s1, s2,rr); return 1; }
static uint8_t tinySubstractMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l) { int delta; uint32_t ww,hh; uint8_t *s1,*s2,*d1; int ll,rr; ll=l>>2; rr=l&3; s1=src1; s2=src2; d1=dst; ADM_CLEAR_MM7(); for(int x=0; x<ll; x++) { __asm__ volatile( "movd (%0),%%mm0 \n" "movd (%1),%%mm1 \n" "punpcklbw %%mm7,%%mm0 \n" "punpcklbw %%mm7,%%mm1 \n" "paddw %%mm0,%%mm0 \n" "psubusw %%mm1,%%mm0 \n" // mm1=sum "packuswb %%mm0, %%mm0\n" "movd %%mm0,(%2) \n" :: "r"(s1),"r"(s2),"r"(d1) :"memory" ); s1+=4; s2+=4; d1+=4; } ADM_EMMS(); if(rr) tinySubstractMMX(d1, s1, s2,rr); return 1; }