static force_inline vector unsigned int pix_add_mul (vector unsigned int x, vector unsigned int a, vector unsigned int y, vector unsigned int b) { vector unsigned int t1, t2; t1 = pix_multiply (x, a); t2 = pix_multiply (y, b); return pix_add (t1, t2); }
static force_inline __vector4 pix_add_mul (__vector4 x, __vector4 a, __vector4 y, __vector4 b) { __vector4 t1, t2; t1 = pix_multiply (x, a); t2 = pix_multiply (y, b); return pix_add (t1, t2); }
static void vmx128_combine_atop_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, const uint32_t * mask, int width) { int i; __vector4 vdest, vsrc, vmask, vsrca; __vector4 tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); vsrca = splat_alpha (vsrc); vsrc = pix_multiply (vsrc, vmask); vmask = pix_multiply (vmask, vsrca); vdest = pix_add_mul (vsrc, splat_alpha (vdest), negate (vmask), vdest); STORE_VECTOR (dest); src += 4; dest += 4; mask += 4; } for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (d); UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); dest[i] = d; } }
static void vmx128_combine_out_u_no_mask (uint32_t * dest, const uint32_t *src, int width) { int i; __vector4 vdest, vsrc; __vector4 tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); STORE_VECTOR (dest); src += 4; dest += 4; } for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t a = ALPHA_8 (~dest[i]); UN8x4_MUL_UN8 (s, a); dest[i] = s; } }
static void vmx_combine_in_reverse_u_no_mask (uint32_t * dest, const uint32_t *src, int width) { int i; vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); vdest = pix_multiply (vdest, splat_alpha (vsrc)); STORE_VECTOR (dest); src += 4; dest += 4; } for (i = width % 4; --i >= 0;) { uint32_t d = dest[i]; uint32_t a = ALPHA_8 (src[i]); UN8x4_MUL_UN8 (d, a); dest[i] = d; } }
/* dest*~srca + src */ static force_inline __vector4 over (__vector4 src, __vector4 srca, __vector4 dest) { __vector4 tmp = pix_multiply (dest, negate (srca)); tmp = __vaddubs (src, tmp); return tmp; }
static void vmx_combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, const uint32_t * mask, int width) { int i; vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); vdest = pix_multiply ( vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); STORE_VECTOR (dest); src += 4; dest += 4; mask += 4; } for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4 (d, ~a); dest[i] = d; } }
/* dest*~srca + src */ static force_inline vector unsigned int over (vector unsigned int src, vector unsigned int srca, vector unsigned int dest) { vector unsigned char tmp = (vector unsigned char) pix_multiply (dest, negate (srca)); tmp = vec_adds ((vector unsigned char)src, tmp); return (vector unsigned int)tmp; }
static void vmx128_combine_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, const uint32_t * mask, int width) { int i; __vector4 vdest, vsrc, vmask; __vector4 tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); STORE_VECTOR (dest); mask += 4; src += 4; dest += 4; } for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t ida = ALPHA_8 (~d); UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); dest[i] = s; } }
static void vmx_combine_add_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, const uint32_t * mask, int width) { int i; vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); vdest = pix_add (pix_multiply (vsrc, vmask), vdest); STORE_VECTOR (dest); src += 4; dest += 4; mask += 4; } for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; UN8x4_MUL_UN8x4 (s, a); UN8x4_ADD_UN8x4 (s, d); dest[i] = s; } }
static void vmx128_combine_out_reverse_u_mask (uint32_t * dest, const uint32_t *src, const uint32_t *mask, int width) { int i; __vector4 vdest, vsrc, vmask; __vector4 tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); STORE_VECTOR (dest); src += 4; dest += 4; mask += 4; } for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t d = dest[i]; uint32_t a = src[i]; UN8x4_MUL_UN8 (a, m); a = ALPHA_8 (~a); UN8x4_MUL_UN8 (d, a); dest[i] = d; } }
static void vmx_combine_in_u_mask (uint32_t * dest, const uint32_t *src, const uint32_t *mask, int width) { int i; vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); vdest = pix_multiply (vsrc, splat_alpha (vdest)); STORE_VECTOR (dest); src += 4; dest += 4; mask += 4; } for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t a = ALPHA_8 (dest[i]); UN8x4_MUL_UN8 (s, m); UN8x4_MUL_UN8 (s, a); dest[i] = s; } }