void imageFilterSubFrom(unsigned char *dst, unsigned char *src, int length)
{
#if defined(USE_PPC_GFX)
    if(cpufuncs & CPUF_PPC_ALTIVEC) {
        imageFilterSubFrom_Altivec(dst, src, length);
    } else {
        int n = length + 1;
        BASIC_SUBFROM();
    }
#elif defined(USE_X86_GFX)

#ifndef MACOSX
    if (cpufuncs & CPUF_X86_SSE2) {
#endif // !MACOSX

        imageFilterSubFrom_SSE2(dst, src, length);

#ifndef MACOSX
    } else if (cpufuncs & CPUF_X86_MMX) {

        imageFilterSubFrom_MMX(dst, src, length);

    } else {
        int n = length + 1;
        BASIC_SUBFROM();
    }
#endif // !MACOSX

#else // no special gfx handling
    int n = length + 1;
    BASIC_SUBFROM();
#endif
}
void imageFilterSubFrom_SSE2(unsigned char *dst, unsigned char *src, int length)
{
    int n = length;

    // Compute first few values so we're on a 16-byte boundary in dst
    while( (((long)dst & 0xF) > 0) && (n > 0) ) {
        SUBFROM_PIXEL();
        --n; ++dst; ++src;
    }

    // Do bulk of processing using SSE2 (sub 16 8-bit unsigned integers, with saturation)
    while(n >= 16) {
        __m128i s = _mm_loadu_si128((__m128i*)src);
        __m128i d = _mm_load_si128((__m128i*)dst);
        __m128i r = _mm_subs_epu8(d, s);
        _mm_store_si128((__m128i*)dst, r);

        n -= 16; src += 16; dst += 16;
    }

    // If any bytes are left over, deal with them individually
    ++n;
    BASIC_SUBFROM();
}
void imageFilterSubFrom_Altivec(unsigned char *dst, unsigned char *src, int length)
{
    int n = length;

    // Compute first few values so we're on a 16-byte boundary in dst
    while( (((long)dst & 0xF) > 0) && (n > 0) ) {
        SUBFROM_PIXEL();
        --n; ++dst; ++src;
    }

    // Do bulk of processing using Altivec (sub 16 8-bit unsigned integers, with saturation)
    while(n >= 16) {
        vector unsigned char s = vec_ld(0,src);
        vector unsigned char d = vec_ld(0,dst);
        vector unsigned char r = vec_subs(d, s);
        vec_st(r,0,dst);

        n -= 16; src += 16; dst += 16;
    }

    // If any bytes are left over, deal with them individually
    ++n;
    BASIC_SUBFROM();
}