void pix_subtract :: processYUV_MMX (imageStruct &image, imageStruct &right){ int datasize = image.xsize * image.ysize * image.csize; __m64*leftPix = (__m64*)image.data; __m64*rightPix = (__m64*)right.data; datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0); __m64 null64 = _mm_setzero_si64(); __m64 offset = _mm_setr_pi16(0x80, 0x00, 0x80, 0x00); __m64 l0, l1, r0, r1; while (datasize--) { l1=leftPix[datasize]; r1=rightPix[datasize]; l0=_mm_unpacklo_pi8 (l1, null64); r0=_mm_unpacklo_pi8 (r1, null64); l1=_mm_unpackhi_pi8 (l1, null64); r1=_mm_unpackhi_pi8 (r1, null64); l0=_mm_adds_pu16(l0, offset); l1=_mm_adds_pu16(l1, offset); l0=_mm_subs_pu16(l0, r0); l1=_mm_subs_pu16(l1, r1); leftPix[datasize]=_mm_packs_pu16(l0, l1); } _mm_empty(); }
/* do the processing for all colourspaces */ void pix_motionblur :: processMMX(imageStruct &image) { m_savedImage.xsize=image.xsize; m_savedImage.ysize=image.ysize; m_savedImage.setCsizeByFormat(image.format); m_savedImage.reallocate(); int pixsize=image.ysize*image.xsize*image.csize; pixsize=pixsize/sizeof(__m64)+(pixsize%sizeof(__m64)!=0); __m64*pixels=(__m64*)image.data; __m64*old=(__m64*)m_savedImage.data; __m64 newGain = _mm_set1_pi16(static_cast<short>(m_blur0)); __m64 oldGain = _mm_set1_pi16(static_cast<short>(m_blur1)); __m64 null64 = _mm_setzero_si64(); __m64 newpix1, newpix2, oldpix1, oldpix2; while(pixsize--) { newpix1=pixels[pixsize]; oldpix1=old[pixsize]; newpix2 = _mm_unpackhi_pi8(newpix1, null64); newpix1 = _mm_unpacklo_pi8(newpix1, null64); oldpix2 = _mm_unpackhi_pi8(oldpix1, null64); oldpix1 = _mm_unpacklo_pi8(oldpix1, null64); newpix1 = _mm_mullo_pi16(newpix1, newGain); newpix2 = _mm_mullo_pi16(newpix2, newGain); oldpix1 = _mm_mullo_pi16(oldpix1, oldGain); oldpix2 = _mm_mullo_pi16(oldpix2, oldGain); newpix1 = _mm_adds_pu16 (newpix1, oldpix1); newpix2 = _mm_adds_pu16 (newpix2, oldpix2); newpix1 = _mm_srli_pi16(newpix1, 8); newpix2 = _mm_srli_pi16(newpix2, 8); newpix1 = _mm_packs_pu16(newpix1, newpix2); pixels[pixsize]=newpix1; old [pixsize]=newpix1; } _mm_empty(); }
__m64 test40(__m64 a, __m64 b) { // CHECK: paddusw return _mm_adds_pu16(a, b); }
__m64 test_mm_adds_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_adds_pu16 // CHECK: call x86_mmx @llvm.x86.mmx.paddus.w return _mm_adds_pu16(a, b); }