void gimp_composite_darken_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; vector unsigned char a,b,d; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); d=vec_min(a, b); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); d=vec_min(a, b); StoreUnalignedLess(d, D, length); }
void gimp_composite_swap_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guint length = ctx->n_pixels; vector unsigned char a,b; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); StoreUnaligned(b, A); StoreUnaligned(a, B); A+=16; B+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); StoreUnalignedLess(a, B, length); StoreUnalignedLess(b, A, length); }
void gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; vector unsigned char a,b,d,alpha_a,alpha_b,alpha; vector unsigned short al,ah; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); al=vec_mule(a,b); al=vec_add(al,ox0080); ah=vec_mulo(a,b); ah=vec_add(ah,ox0080); al=vec_add(al,vec_sr(al,ox0008)); ah=vec_add(ah,vec_sr(ah,ox0008)); d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); al=vec_mule(a,b); al=vec_add(al,ox0080); ah=vec_mulo(a,b); ah=vec_add(ah,ox0080); al=vec_add(al,vec_sr(al,ox0008)); ah=vec_add(ah,vec_sr(ah,ox0008)); d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnalignedLess(d, D, length); }
void gimp_composite_difference_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; vector unsigned char a,b,d,e,alpha_a,alpha_b; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); d=vec_min(alpha_a, alpha_b); a=vec_andc(a, alphamask); a=vec_adds(a, d); b=vec_andc(b, alphamask); d=vec_subs(a, b); e=vec_subs(b, a); d=vec_add(d,e); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); alpha_a=vec_and(a,alphamask); alpha_b=vec_and(b,alphamask); d=vec_min(alpha_a,alpha_b); a=vec_andc(a,alphamask); a=vec_adds(a,d); b=vec_andc(b,alphamask); d=vec_subs(a,b); e=vec_subs(b, a); d=vec_add(d,e); StoreUnalignedLess(d, D, length); }
void gimp_composite_dodge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; vector unsigned char a,b,d; vector unsigned char alpha_a,alpha_b,alpha; vector signed short ox0001=vec_splat_s16(1); union { vector signed short v; vector unsigned short vu; gushort u16[8]; } ah,al,bh,bl; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); ah.v=vec_unpackh((vector signed char)a); ah.v=vec_sl(ah.v,ox0008); al.v=vec_unpackl((vector signed char)a); al.v=vec_sl(al.v,ox0008); b=vec_nor(b,b); bh.v=vec_unpackh((vector signed char)b); bh.v=vec_and(bh.v,ox00ff); bh.v=vec_add(bh.v,ox0001); bl.v=vec_unpackl((vector signed char)b); bl.v=vec_and(bl.v,ox00ff); bl.v=vec_add(bl.v,ox0001); ah.u16[0]=ah.u16[0]/bh.u16[0]; ah.u16[1]=ah.u16[1]/bh.u16[1]; ah.u16[2]=ah.u16[2]/bh.u16[2]; ah.u16[4]=ah.u16[4]/bh.u16[4]; ah.u16[5]=ah.u16[5]/bh.u16[5]; ah.u16[6]=ah.u16[6]/bh.u16[6]; al.u16[0]=al.u16[0]/bl.u16[0]; al.u16[1]=al.u16[1]/bl.u16[1]; al.u16[2]=al.u16[2]/bl.u16[2]; al.u16[4]=al.u16[4]/bl.u16[4]; al.u16[5]=al.u16[5]/bl.u16[5]; al.u16[6]=al.u16[6]/bl.u16[6]; d=vec_packs(ah.vu,al.vu); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); ah.v=vec_unpackh((vector signed char)a); ah.v=vec_sl(ah.v,ox0008); al.v=vec_unpackl((vector signed char)a); al.v=vec_sl(al.v,ox0008); b=vec_nor(b,b); bh.v=vec_unpackh((vector signed char)b); bh.v=vec_and(bh.v,ox00ff); bh.v=vec_add(bh.v,ox0001); bl.v=vec_unpackl((vector signed char)b); bl.v=vec_and(bl.v,ox00ff); bl.v=vec_add(bl.v,ox0001); ah.u16[0]=ah.u16[0]/bh.u16[0]; ah.u16[1]=ah.u16[1]/bh.u16[1]; ah.u16[2]=ah.u16[2]/bh.u16[2]; ah.u16[4]=ah.u16[4]/bh.u16[4]; ah.u16[5]=ah.u16[5]/bh.u16[5]; ah.u16[6]=ah.u16[6]/bh.u16[6]; al.u16[0]=al.u16[0]/bl.u16[0]; al.u16[1]=al.u16[1]/bl.u16[1]; al.u16[2]=al.u16[2]/bl.u16[2]; al.u16[4]=al.u16[4]/bl.u16[4]; al.u16[5]=al.u16[5]/bl.u16[5]; al.u16[6]=al.u16[6]/bl.u16[6]; d=vec_packs(ah.vu,al.vu); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnalignedLess(d, D, length); }
void gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; vector unsigned char a,b,d,alpha_a,alpha_b,alpha; vector signed short ah,al,bh,bl; while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); ah=vec_unpackh((vector signed char)a); ah=vec_and(ah,ox00ff); al=vec_unpackl((vector signed char)a); al=vec_and(al,ox00ff); bh=vec_unpackh((vector signed char)b); bh=vec_and(bh,ox00ff); bl=vec_unpackl((vector signed char)b); bl=vec_and(bl,ox00ff); ah=vec_sub(ah,bh); al=vec_sub(al,bl); ah=vec_sub(ah,oxff80); al=vec_sub(al,oxff80); d=vec_packsu(ah,al); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); alpha_a=vec_and(a, alphamask); alpha_b=vec_and(b, alphamask); alpha=vec_min(alpha_a, alpha_b); ah=vec_unpackh((vector signed char)a); ah=vec_and(ah,ox00ff); al=vec_unpackl((vector signed char)a); al=vec_and(al,ox00ff); bh=vec_unpackh((vector signed char)b); bh=vec_and(bh,ox00ff); bl=vec_unpackl((vector signed char)b); bl=vec_and(bl,ox00ff); ah=vec_sub(ah,bh); al=vec_sub(al,bl); ah=vec_sub(ah,oxff80); al=vec_sub(al,oxff80); d=vec_packsu(ah,al); d=vec_andc(d, alphamask); d=vec_or(d, alpha); StoreUnalignedLess(d, D, length); }
void gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx) { const guchar *A = ctx->A; const guchar *B = ctx->B; guchar *D = ctx->D; guint length = ctx->n_pixels; guchar blend = ctx->blend.blend; union { vector unsigned char v; unsigned char u8[16]; } vblend; vector unsigned char vblendc; vector unsigned char a,b,d; vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1); guchar tmp; for (tmp=0; tmp<16; tmp++ ) vblend.u8[tmp]=blend; vblendc=vec_nor(vblend.v,vblend.v); while (length >= 4) { a=LoadUnaligned(A); b=LoadUnaligned(B); /* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255; * to divide by 255 we use ((n+1)+(n+1)>>8)>>8 * It works for all value but 0xffff * happily blending formula can't give this value */ al=vec_mule(a,vblendc); ah=vec_mulo(a,vblendc); bl=vec_mule(b,vblend.v); bh=vec_mulo(b,vblend.v); al=vec_add(al,bl); al=vec_add(al,one); al=vec_add(al,vec_sr(al,ox0008)); ah=vec_add(ah,bh); ah=vec_add(ah,one); ah=vec_add(ah,vec_sr(ah,ox0008)); d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes); StoreUnaligned(d, D); A+=16; B+=16; D+=16; length-=4; } /* process last pixels */ length = length*4; a=LoadUnalignedLess(A, length); b=LoadUnalignedLess(B, length); al=vec_mule(a,vblendc); ah=vec_mulo(a,vblendc); bl=vec_mule(b,vblend.v); bh=vec_mulo(b,vblend.v); al=vec_add(al,bl); al=vec_add(al,one); al=vec_add(al,vec_sr(al,ox0008)); ah=vec_add(ah,bh); ah=vec_add(ah,one); ah=vec_add(ah,vec_sr(ah,ox0008)); d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes); StoreUnalignedLess(d, D, length); }
void LossyConvertEncoding8to16::write_vmx(const char* aSource, uint32_t aSourceLength) { char16_t *dest = mDestination; // Align dest destination to a 16-byte boundary. We choose to align dest rather than // source because we can store neither safely nor fast to unaligned addresses. // We must use unsigned datatypes because aSourceLength is unsigned. uint32_t i = 0; uint32_t alignLen = XPCOM_MIN<uint32_t>(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf) / sizeof(char16_t)); // subtraction result can underflow if aSourceLength < alignLen!!! // check for underflow if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) { for (; i < alignLen; i++) { dest[i] = static_cast<unsigned char>(aSource[i]); } // maxIndex can underflow if aSourceLength < 33!!! uint32_t maxIndex = aSourceLength - 33; // check for underflow if (maxIndex <= aSourceLength && i < maxIndex) { const char *aOurSource = &aSource[i]; char16_t *aOurDest = &dest[i]; register const vector unsigned char zeroes = vec_splat_u8( 0 ); register vector unsigned char source1, source2, lo1, hi1, lo2, hi2; if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) { // Walk 32 bytes (two VMX registers) at a time. while (1) { source1 = vec_ld(0, (unsigned char *)aOurSource); source2 = vec_ld(16, (unsigned char *)aOurSource); // Interleave 0s in with the bytes of source to create lo and hi. // store lo and hi into dest. hi1 = vec_mergeh(zeroes, source1); lo1 = vec_mergel(zeroes, source1); hi2 = vec_mergeh(zeroes, source2); lo2 = vec_mergel(zeroes, source2); vec_st(hi1, 0, (unsigned char *)aOurDest); vec_st(lo1, 16, (unsigned char *)aOurDest); vec_st(hi2, 32, (unsigned char *)aOurDest); vec_st(lo2, 48, (unsigned char *)aOurDest); i += 32; if (i > maxIndex) break; aOurSource += 32; aOurDest += 32; } } else { register vector unsigned char mask = vec_lvsl(0, (unsigned char *)aOurSource); register vector unsigned char vector1 = vec_ld(0, (unsigned char *)aOurSource); register vector unsigned char vector2; // Walk 32 bytes (two VMX registers) at a time. while (1) { LoadUnaligned(source1, 0, (unsigned char *)aOurSource, vector1, vector2, mask); LoadUnaligned(source2, 16, (unsigned char *)aOurSource, vector2, vector1, mask); // Interleave 0s in with the bytes of source to create lo and hi. // store lo and hi into dest. hi1 = vec_mergeh(zeroes, source1); lo1 = vec_mergel(zeroes, source1); hi2 = vec_mergeh(zeroes, source2); lo2 = vec_mergel(zeroes, source2); vec_st(hi1, 0, (unsigned char *)aOurDest); vec_st(lo1, 16, (unsigned char *)aOurDest); vec_st(hi2, 32, (unsigned char *)aOurDest); vec_st(lo2, 48, (unsigned char *)aOurDest); i += 32; if (i > maxIndex) break; aOurSource += 32; aOurDest += 32; } } } } // Finish up whatever's left. for (; i < aSourceLength; i++) { dest[i] = static_cast<unsigned char>(aSource[i]); } mDestination += i; }
void LossyConvertEncoding16to8::write_vmx(const char16_t* aSource, uint32_t aSourceLength) { char* dest = mDestination; // Align destination to a 16-byte boundary. // We must use unsigned datatypes because aSourceLength is unsigned. uint32_t i = 0; uint32_t alignLen = XPCOM_MIN(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf)); // subtraction result can underflow if aSourceLength < alignLen!!! // check for underflow if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) { for (; i < alignLen; i++) { dest[i] = static_cast<unsigned char>(aSource[i]); } // maxIndex can underflow if aSourceLength < 33!!! uint32_t maxIndex = aSourceLength - 33; // check for underflow if (maxIndex <= aSourceLength && i < maxIndex) { const char16_t *aOurSource = &aSource[i]; char *aOurDest = &dest[i]; register vector unsigned char packed1, packed2; register vector unsigned short source1, source2, source3, source4; if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) { // Walk 64 bytes (four VMX registers) at a time. while (1) { source1 = vec_ld(0, (unsigned short *)aOurSource); source2 = vec_ld(16, (unsigned short *)aOurSource); source3 = vec_ld(32, (unsigned short *)aOurSource); source4 = vec_ld(48, (unsigned short *)aOurSource); packed1 = vec_packsu(source1, source2); packed2 = vec_packsu(source3, source4); vec_st(packed1, 0, (unsigned char *)aOurDest); vec_st(packed2, 16, (unsigned char *)aOurDest); i += 32; if(i > maxIndex) break; aOurDest += 32; aOurSource += 32; } } else { register vector unsigned char mask = vec_lvsl(0, (unsigned short *)aOurSource); register vector unsigned short vector1 = vec_ld(0, (unsigned short *)aOurSource); register vector unsigned short vector2; // Walk 64 bytes (four VMX registers) at a time. while (1) { LoadUnaligned(source1, 0, (unsigned short *)aOurSource, vector1, vector2, mask); LoadUnaligned(source2, 16, (unsigned short *)aOurSource, vector2, vector1, mask); LoadUnaligned(source3, 32, (unsigned short *)aOurSource, vector1, vector2, mask); LoadUnaligned(source4, 48, (unsigned short *)aOurSource, vector2, vector1, mask); packed1 = vec_packsu(source1, source2); packed2 = vec_packsu(source3, source4); vec_st(packed1, 0, (unsigned char *)aOurDest); vec_st(packed2, 16, (unsigned char *)aOurDest); i += 32; if(i > maxIndex) break; aOurDest += 32; aOurSource += 32; } } } } // Finish up the rest. for (; i < aSourceLength; i++) { dest[i] = static_cast<unsigned char>(aSource[i]); } mDestination += i; }