__m64 unsigned_add3 (const __m64 * a, const __m64 * b, __m64 * result, unsigned int count) { __m64 _a, _b, one, sum, carry, onesCarry; unsigned int i; carry = _mm_setzero_si64 (); one = _mm_cmpeq_pi8 (carry, carry); one = _mm_sub_si64 (carry, one); for (i = 0; i < count; i++) { _a = a[i]; _b = b[i]; sum = _mm_add_si64 (_a, _b); sum = _mm_add_si64 (sum, carry); result[i] = sum; onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry); onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry); onesCarry = _mm_and_si64 (onesCarry, one); _a = _mm_srli_si64 (_a, 1); _b = _mm_srli_si64 (_b, 1); carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry); carry = _mm_srli_si64 (carry, 63); } return carry; }
__m64 test_mm_sub_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sub_si64 // CHECK: call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}}) return _mm_sub_si64(a, b); }