void pix_subtract :: processYUV_MMX (imageStruct &image, imageStruct &right){ int datasize = image.xsize * image.ysize * image.csize; __m64*leftPix = (__m64*)image.data; __m64*rightPix = (__m64*)right.data; datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0); __m64 null64 = _mm_setzero_si64(); __m64 offset = _mm_setr_pi16(0x80, 0x00, 0x80, 0x00); __m64 l0, l1, r0, r1; while (datasize--) { l1=leftPix[datasize]; r1=rightPix[datasize]; l0=_mm_unpacklo_pi8 (l1, null64); r0=_mm_unpacklo_pi8 (r1, null64); l1=_mm_unpackhi_pi8 (l1, null64); r1=_mm_unpackhi_pi8 (r1, null64); l0=_mm_adds_pu16(l0, offset); l1=_mm_adds_pu16(l1, offset); l0=_mm_subs_pu16(l0, r0); l1=_mm_subs_pu16(l1, r1); leftPix[datasize]=_mm_packs_pu16(l0, l1); } _mm_empty(); }
__m64 test48(__m64 a, __m64 b) { // CHECK: psubusw return _mm_subs_pu16(a, b); }
test (__m64 s1, __m64 s2) { return _mm_subs_pu16 (s1, s2); }
__m64 test_mm_subs_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_subs_pu16 // CHECK: call x86_mmx @llvm.x86.mmx.psubus.w return _mm_subs_pu16(a, b); }