static __m128i xor_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i sa = SkGetPackedA32_SSE2(src); __m128i da = SkGetPackedA32_SSE2(dst); __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); __m128i a1 = _mm_add_epi32(sa, da); __m128i a2 = SkAlphaMulAlpha_SSE2(sa, da); a2 = _mm_slli_epi32(a2, 1); __m128i a = _mm_sub_epi32(a1, a2); __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); __m128i r = _mm_add_epi32(r1, r2); __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); __m128i g = _mm_add_epi32(g1, g2); __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); __m128i b = _mm_add_epi32(b1, b2); return SkPackARGB32_SSE2(a, r, g, b); }
static __m128i screen_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i a = srcover_byte_SSE2(SkGetPackedA32_SSE2(src), SkGetPackedA32_SSE2(dst)); __m128i r = srcover_byte_SSE2(SkGetPackedR32_SSE2(src), SkGetPackedR32_SSE2(dst)); __m128i g = srcover_byte_SSE2(SkGetPackedG32_SSE2(src), SkGetPackedG32_SSE2(dst)); __m128i b = srcover_byte_SSE2(SkGetPackedB32_SSE2(src), SkGetPackedB32_SSE2(dst)); return SkPackARGB32_SSE2(a, r, g, b); }
static __m128i modulate_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i a = SkAlphaMulAlpha_SSE2(SkGetPackedA32_SSE2(src), SkGetPackedA32_SSE2(dst)); __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), SkGetPackedR32_SSE2(dst)); __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), SkGetPackedG32_SSE2(dst)); __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), SkGetPackedB32_SSE2(dst)); return SkPackARGB32_SSE2(a, r, g, b); }
static __m128i plus_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i b = saturated_add_SSE2(SkGetPackedB32_SSE2(src), SkGetPackedB32_SSE2(dst)); __m128i g = saturated_add_SSE2(SkGetPackedG32_SSE2(src), SkGetPackedG32_SSE2(dst)); __m128i r = saturated_add_SSE2(SkGetPackedR32_SSE2(src), SkGetPackedR32_SSE2(dst)); __m128i a = saturated_add_SSE2(SkGetPackedA32_SSE2(src), SkGetPackedA32_SSE2(dst)); return SkPackARGB32_SSE2(a, r, g, b); }
static __m128i overlay_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i sa = SkGetPackedA32_SSE2(src); __m128i da = SkGetPackedA32_SSE2(dst); __m128i a = srcover_byte_SSE2(sa, da); __m128i r = overlay_byte_SSE2(SkGetPackedR32_SSE2(src), SkGetPackedR32_SSE2(dst), sa, da); __m128i g = overlay_byte_SSE2(SkGetPackedG32_SSE2(src), SkGetPackedG32_SSE2(dst), sa, da); __m128i b = overlay_byte_SSE2(SkGetPackedB32_SSE2(src), SkGetPackedB32_SSE2(dst), sa, da); return SkPackARGB32_SSE2(a, r, g, b); }
static __m128i multiply_modeproc_SSE2(const __m128i& src, const __m128i& dst) { __m128i sa = SkGetPackedA32_SSE2(src); __m128i da = SkGetPackedA32_SSE2(dst); __m128i a = srcover_byte_SSE2(sa, da); __m128i sr = SkGetPackedR32_SSE2(src); __m128i dr = SkGetPackedR32_SSE2(dst); __m128i r = blendfunc_multiply_byte_SSE2(sr, dr, sa, da); __m128i sg = SkGetPackedG32_SSE2(src); __m128i dg = SkGetPackedG32_SSE2(dst); __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); __m128i sb = SkGetPackedB32_SSE2(src); __m128i db = SkGetPackedB32_SSE2(dst); __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); return SkPackARGB32_SSE2(a, r, g, b); }