void imageFilterSubFrom(unsigned char *dst, unsigned char *src, int length) { #if defined(USE_PPC_GFX) if(cpufuncs & CPUF_PPC_ALTIVEC) { imageFilterSubFrom_Altivec(dst, src, length); } else { int n = length + 1; BASIC_SUBFROM(); } #elif defined(USE_X86_GFX) #ifndef MACOSX if (cpufuncs & CPUF_X86_SSE2) { #endif // !MACOSX imageFilterSubFrom_SSE2(dst, src, length); #ifndef MACOSX } else if (cpufuncs & CPUF_X86_MMX) { imageFilterSubFrom_MMX(dst, src, length); } else { int n = length + 1; BASIC_SUBFROM(); } #endif // !MACOSX #else // no special gfx handling int n = length + 1; BASIC_SUBFROM(); #endif }
void imageFilterSubFrom_SSE2(unsigned char *dst, unsigned char *src, int length) { int n = length; // Compute first few values so we're on a 16-byte boundary in dst while( (((long)dst & 0xF) > 0) && (n > 0) ) { SUBFROM_PIXEL(); --n; ++dst; ++src; } // Do bulk of processing using SSE2 (sub 16 8-bit unsigned integers, with saturation) while(n >= 16) { __m128i s = _mm_loadu_si128((__m128i*)src); __m128i d = _mm_load_si128((__m128i*)dst); __m128i r = _mm_subs_epu8(d, s); _mm_store_si128((__m128i*)dst, r); n -= 16; src += 16; dst += 16; } // If any bytes are left over, deal with them individually ++n; BASIC_SUBFROM(); }
void imageFilterSubFrom_Altivec(unsigned char *dst, unsigned char *src, int length) { int n = length; // Compute first few values so we're on a 16-byte boundary in dst while( (((long)dst & 0xF) > 0) && (n > 0) ) { SUBFROM_PIXEL(); --n; ++dst; ++src; } // Do bulk of processing using Altivec (sub 16 8-bit unsigned integers, with saturation) while(n >= 16) { vector unsigned char s = vec_ld(0,src); vector unsigned char d = vec_ld(0,dst); vector unsigned char r = vec_subs(d, s); vec_st(r,0,dst); n -= 16; src += 16; dst += 16; } // If any bytes are left over, deal with them individually ++n; BASIC_SUBFROM(); }