uint8_t ADMImage::substract(ADMImage *src1,ADMImage *src2)
{

#ifdef ADM_CPU_X86
uint32_t r1,r2;
        if(CpuCaps::hasMMX())
        {
                 return tinySubstractMMX(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height);                
        }
#endif
        return tinySubstract(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height);
}
uint8_t ADMImage::substract(ADMImage *src1,ADMImage *src2)
{

#if 1 && defined( ARCH_X86 ) || defined (ARCH_X86_64)
uint32_t r1,r2;
        if(CpuCaps::hasMMX())
        {
                 return tinySubstractMMX(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height);                
        }
#endif
        return tinySubstract(YPLANE(this),YPLANE(src1),YPLANE(src2),src1->_width*src1->_height);
}
static uint8_t tinySubstractMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l)
{
int delta;
uint32_t ww,hh;
uint8_t *s1,*s2,*d1;
int ll,rr;
        ll=l>>2;
        rr=l&3;
        s1=src1;
        s2=src2;
        
        d1=dst;
      
#ifdef GCC_2_95_X
        __asm__(
                         "pxor %mm7,%mm7"
                ::
                 );
#else
        __asm__(
                         "pxor %%mm7,%%mm7"
                ::
                 );
#endif
        for(int x=0;x<ll;x++)
                {
                        __asm__(
                        "movd           (%0),%%mm0 \n"
                        "movd           (%1),%%mm1 \n"
                       
                        "punpcklbw      %%mm7,%%mm0 \n"
                        "punpcklbw      %%mm7,%%mm1 \n"
                      
                        
                        "paddw          %%mm0,%%mm0 \n"
                       
                        
                        "psubusw        %%mm1,%%mm0 \n" // mm1=sum                       
                        "packuswb       %%mm0,  %%mm0\n"
                        "movd           %%mm0,(%2) \n"                       
                        :: "r"(s1),"r"(s2),"r"(d1)
                        );
                        s1+=4;
                        s2+=4;
                        d1+=4;
                }
                 __asm__(                       
                        "emms\n"
                :: 
                );
        if(rr) tinySubstractMMX(d1, s1, s2,rr);
        return 1;
}
static uint8_t tinySubstractMMX(uint8_t *dst, uint8_t *src1, uint8_t *src2,uint32_t l)
{
    int delta;
    uint32_t ww,hh;
    uint8_t *s1,*s2,*d1;
    int ll,rr;
    ll=l>>2;
    rr=l&3;
    s1=src1;
    s2=src2;

    d1=dst;

    ADM_CLEAR_MM7();
    for(int x=0; x<ll; x++)
    {
        __asm__ volatile(
            "movd           (%0),%%mm0 \n"
            "movd           (%1),%%mm1 \n"

            "punpcklbw      %%mm7,%%mm0 \n"
            "punpcklbw      %%mm7,%%mm1 \n"


            "paddw          %%mm0,%%mm0 \n"


            "psubusw        %%mm1,%%mm0 \n" // mm1=sum
            "packuswb       %%mm0,  %%mm0\n"
            "movd           %%mm0,(%2) \n"
            :: "r"(s1),"r"(s2),"r"(d1)
            :"memory"
        );
        s1+=4;
        s2+=4;
        d1+=4;
    }
    ADM_EMMS();
    if(rr) tinySubstractMMX(d1, s1, s2,rr);
    return 1;
}