static inline __m128i darken_byte_SSE2(const __m128i& sc, const __m128i& dc, const __m128i& sa, const __m128i& da) { __m128i sd = _mm_mullo_epi16(sc, da); __m128i ds = _mm_mullo_epi16(dc, sa); __m128i cmp = _mm_cmplt_epi32(sd, ds); __m128i tmp = _mm_add_epi32(sc, dc); __m128i ret1 = _mm_sub_epi32(tmp, SkDiv255Round_SSE2(ds)); __m128i ret2 = _mm_sub_epi32(tmp, SkDiv255Round_SSE2(sd)); __m128i ret = _mm_or_si128(_mm_and_si128(cmp, ret1), _mm_andnot_si128(cmp, ret2)); return ret; }
static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, const __m128i& sa, const __m128i& da) { __m128i tmp1 = _mm_mullo_epi16(sc, da); __m128i tmp2 = _mm_mullo_epi16(dc, sa); __m128i tmp = SkMin32_SSE2(tmp1, tmp2); __m128i ret1 = _mm_add_epi32(sc, dc); __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); __m128i ret = _mm_sub_epi32(ret1, ret2); ret = clamp_signed_byte_SSE2(ret); return ret; }
static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { // test if > 0 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); // test if < 255*255 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); __m128i ret = _mm_setzero_si128(); // if value >= 255*255, value = 255 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); __m128i div = SkDiv255Round_SSE2(prod); // test if > 0 && < 255*255 __m128i cmp = _mm_and_si128(cmp1, cmp2); ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); return ret; }