Ejemplo n.º 1
0
void
gimp_composite_darken_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      d=vec_min(a, b);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  d=vec_min(a, b);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 2
0
void
gimp_composite_swap_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guint length = ctx->n_pixels;
  vector unsigned char a,b;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);
      StoreUnaligned(b, A);
      StoreUnaligned(a, B);
      A+=16;
      B+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);
  StoreUnalignedLess(a, B, length);
  StoreUnalignedLess(b, A, length);
}
Ejemplo n.º 3
0
void
gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
  vector unsigned short al,ah;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      al=vec_mule(a,b);
      al=vec_add(al,ox0080);
      ah=vec_mulo(a,b);
      ah=vec_add(ah,ox0080);
      al=vec_add(al,vec_sr(al,ox0008));
      ah=vec_add(ah,vec_sr(ah,ox0008));
      d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

      alpha_a=vec_and(a, alphamask);
      alpha_b=vec_and(b, alphamask);
      alpha=vec_min(alpha_a, alpha_b);

      d=vec_andc(d, alphamask);
      d=vec_or(d, alpha);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  al=vec_mule(a,b);
  al=vec_add(al,ox0080);
  ah=vec_mulo(a,b);
  ah=vec_add(ah,ox0080);
  al=vec_add(al,vec_sr(al,ox0008));
  ah=vec_add(ah,vec_sr(ah,ox0008));
  d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

  alpha_a=vec_and(a, alphamask);
  alpha_b=vec_and(b, alphamask);
  alpha=vec_min(alpha_a, alpha_b);

  d=vec_andc(d, alphamask);
  d=vec_or(d, alpha);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 4
0
void
gimp_composite_difference_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d,e,alpha_a,alpha_b;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      alpha_a=vec_and(a, alphamask);
      alpha_b=vec_and(b, alphamask);
      d=vec_min(alpha_a, alpha_b);

      a=vec_andc(a, alphamask);
      a=vec_adds(a, d);
      b=vec_andc(b, alphamask);
      d=vec_subs(a, b);
      e=vec_subs(b, a);
      d=vec_add(d,e);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  alpha_a=vec_and(a,alphamask);
  alpha_b=vec_and(b,alphamask);
  d=vec_min(alpha_a,alpha_b);

  a=vec_andc(a,alphamask);
  a=vec_adds(a,d);
  b=vec_andc(b,alphamask);
  d=vec_subs(a,b);
  e=vec_subs(b, a);
  d=vec_add(d,e);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 5
0
void
gimp_composite_dodge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d;
  vector unsigned char alpha_a,alpha_b,alpha;
  vector signed short ox0001=vec_splat_s16(1);
  union
    {
      vector signed short v;
      vector unsigned short vu;
      gushort u16[8];
    } ah,al,bh,bl;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      alpha_a=vec_and(a, alphamask);
      alpha_b=vec_and(b, alphamask);
      alpha=vec_min(alpha_a, alpha_b);

      ah.v=vec_unpackh((vector signed char)a);
      ah.v=vec_sl(ah.v,ox0008);
      al.v=vec_unpackl((vector signed char)a);
      al.v=vec_sl(al.v,ox0008);

      b=vec_nor(b,b);
      bh.v=vec_unpackh((vector signed char)b);
      bh.v=vec_and(bh.v,ox00ff);
      bh.v=vec_add(bh.v,ox0001);
      bl.v=vec_unpackl((vector signed char)b);
      bl.v=vec_and(bl.v,ox00ff);
      bl.v=vec_add(bl.v,ox0001);

      ah.u16[0]=ah.u16[0]/bh.u16[0];
      ah.u16[1]=ah.u16[1]/bh.u16[1];
      ah.u16[2]=ah.u16[2]/bh.u16[2];
      ah.u16[4]=ah.u16[4]/bh.u16[4];
      ah.u16[5]=ah.u16[5]/bh.u16[5];
      ah.u16[6]=ah.u16[6]/bh.u16[6];

      al.u16[0]=al.u16[0]/bl.u16[0];
      al.u16[1]=al.u16[1]/bl.u16[1];
      al.u16[2]=al.u16[2]/bl.u16[2];
      al.u16[4]=al.u16[4]/bl.u16[4];
      al.u16[5]=al.u16[5]/bl.u16[5];
      al.u16[6]=al.u16[6]/bl.u16[6];

      d=vec_packs(ah.vu,al.vu);

      d=vec_andc(d, alphamask);
      d=vec_or(d, alpha);

      StoreUnaligned(d, D);
      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  alpha_a=vec_and(a, alphamask);
  alpha_b=vec_and(b, alphamask);
  alpha=vec_min(alpha_a, alpha_b);

  ah.v=vec_unpackh((vector signed char)a);
  ah.v=vec_sl(ah.v,ox0008);
  al.v=vec_unpackl((vector signed char)a);
  al.v=vec_sl(al.v,ox0008);

  b=vec_nor(b,b);
  bh.v=vec_unpackh((vector signed char)b);
  bh.v=vec_and(bh.v,ox00ff);
  bh.v=vec_add(bh.v,ox0001);
  bl.v=vec_unpackl((vector signed char)b);
  bl.v=vec_and(bl.v,ox00ff);
  bl.v=vec_add(bl.v,ox0001);

  ah.u16[0]=ah.u16[0]/bh.u16[0];
  ah.u16[1]=ah.u16[1]/bh.u16[1];
  ah.u16[2]=ah.u16[2]/bh.u16[2];
  ah.u16[4]=ah.u16[4]/bh.u16[4];
  ah.u16[5]=ah.u16[5]/bh.u16[5];
  ah.u16[6]=ah.u16[6]/bh.u16[6];

  al.u16[0]=al.u16[0]/bl.u16[0];
  al.u16[1]=al.u16[1]/bl.u16[1];
  al.u16[2]=al.u16[2]/bl.u16[2];
  al.u16[4]=al.u16[4]/bl.u16[4];
  al.u16[5]=al.u16[5]/bl.u16[5];
  al.u16[6]=al.u16[6]/bl.u16[6];

  d=vec_packs(ah.vu,al.vu);

  d=vec_andc(d, alphamask);
  d=vec_or(d, alpha);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 6
0
void
gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
  vector signed short ah,al,bh,bl;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      alpha_a=vec_and(a, alphamask);
      alpha_b=vec_and(b, alphamask);
      alpha=vec_min(alpha_a, alpha_b);

      ah=vec_unpackh((vector signed char)a);
      ah=vec_and(ah,ox00ff);
      al=vec_unpackl((vector signed char)a);
      al=vec_and(al,ox00ff);
      bh=vec_unpackh((vector signed char)b);
      bh=vec_and(bh,ox00ff);
      bl=vec_unpackl((vector signed char)b);
      bl=vec_and(bl,ox00ff);

      ah=vec_sub(ah,bh);
      al=vec_sub(al,bl);
      ah=vec_sub(ah,oxff80);
      al=vec_sub(al,oxff80);

      d=vec_packsu(ah,al);

      d=vec_andc(d, alphamask);
      d=vec_or(d, alpha);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  alpha_a=vec_and(a, alphamask);
  alpha_b=vec_and(b, alphamask);
  alpha=vec_min(alpha_a, alpha_b);

  ah=vec_unpackh((vector signed char)a);
  ah=vec_and(ah,ox00ff);
  al=vec_unpackl((vector signed char)a);
  al=vec_and(al,ox00ff);
  bh=vec_unpackh((vector signed char)b);
  bh=vec_and(bh,ox00ff);
  bl=vec_unpackl((vector signed char)b);
  bl=vec_and(bl,ox00ff);

  ah=vec_sub(ah,bh);
  al=vec_sub(al,bl);
  ah=vec_sub(ah,oxff80);
  al=vec_sub(al,oxff80);

  d=vec_packsu(ah,al);

  d=vec_andc(d, alphamask);
  d=vec_or(d, alpha);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 7
0
void
gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  guchar blend = ctx->blend.blend;
  union
    {
      vector unsigned char v;
      unsigned char u8[16];
    } vblend;

  vector unsigned char vblendc;
  vector unsigned char a,b,d;
  vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1);
  guchar tmp;

  for (tmp=0; tmp<16; tmp++ )
    vblend.u8[tmp]=blend;
  vblendc=vec_nor(vblend.v,vblend.v);

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      /* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255;
       * to divide by 255 we use ((n+1)+(n+1)>>8)>>8
       * It works for all value but 0xffff
       * happily blending formula can't give this value */

      al=vec_mule(a,vblendc);
      ah=vec_mulo(a,vblendc);

      bl=vec_mule(b,vblend.v);
      bh=vec_mulo(b,vblend.v);

      al=vec_add(al,bl);
      al=vec_add(al,one);
      al=vec_add(al,vec_sr(al,ox0008));

      ah=vec_add(ah,bh);
      ah=vec_add(ah,one);
      ah=vec_add(ah,vec_sr(ah,ox0008));

      d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  al=vec_mule(a,vblendc);
  ah=vec_mulo(a,vblendc);

  bl=vec_mule(b,vblend.v);
  bh=vec_mulo(b,vblend.v);

  al=vec_add(al,bl);
  al=vec_add(al,one);
  al=vec_add(al,vec_sr(al,ox0008));

  ah=vec_add(ah,bh);
  ah=vec_add(ah,one);
  ah=vec_add(ah,vec_sr(ah,ox0008));

  d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

  StoreUnalignedLess(d, D, length);
}
Ejemplo n.º 8
0
void
LossyConvertEncoding8to16::write_vmx(const char* aSource,
                                      uint32_t aSourceLength)
{
  char16_t *dest = mDestination;

  // Align dest destination to a 16-byte boundary.  We choose to align dest rather than
  // source because we can store neither safely nor fast to unaligned addresses.
  // We must use unsigned datatypes because aSourceLength is unsigned.
  uint32_t i = 0;
  uint32_t alignLen = XPCOM_MIN<uint32_t>(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf) / sizeof(char16_t));
  // subtraction result can underflow if aSourceLength < alignLen!!!
  // check for underflow
  if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) {
    for (; i < alignLen; i++) {
      dest[i] = static_cast<unsigned char>(aSource[i]);
    }

    // maxIndex can underflow if aSourceLength < 33!!!
    uint32_t maxIndex = aSourceLength - 33;

    // check for underflow
    if (maxIndex <= aSourceLength && i < maxIndex) {
      const char *aOurSource = &aSource[i];
      char16_t *aOurDest = &dest[i];
      register const vector unsigned char zeroes = vec_splat_u8( 0 );
      register vector unsigned char source1, source2, lo1, hi1, lo2, hi2;
      if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) {
        // Walk 32 bytes (two VMX registers) at a time.
        while (1) {
          source1 = vec_ld(0, (unsigned char *)aOurSource);
          source2 = vec_ld(16, (unsigned char *)aOurSource);

          // Interleave 0s in with the bytes of source to create lo and hi.
          // store lo and hi into dest.
          hi1 = vec_mergeh(zeroes, source1);
          lo1 = vec_mergel(zeroes, source1);
          hi2 = vec_mergeh(zeroes, source2);
          lo2 = vec_mergel(zeroes, source2);

          vec_st(hi1, 0, (unsigned char *)aOurDest);
          vec_st(lo1, 16, (unsigned char *)aOurDest);
          vec_st(hi2, 32, (unsigned char *)aOurDest);
          vec_st(lo2, 48, (unsigned char *)aOurDest);

          i += 32;
          if (i > maxIndex)
            break;
          aOurSource += 32;
          aOurDest += 32;
        }
      }
      else  {
        register vector unsigned char mask = vec_lvsl(0, (unsigned char *)aOurSource);
        register vector unsigned char vector1  = vec_ld(0, (unsigned char *)aOurSource);
        register vector unsigned char vector2;
        // Walk 32 bytes (two VMX registers) at a time.
        while (1) {
          LoadUnaligned(source1, 0, (unsigned char *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source2, 16, (unsigned char *)aOurSource, vector2, vector1, mask);

          // Interleave 0s in with the bytes of source to create lo and hi.
          // store lo and hi into dest.
          hi1 = vec_mergeh(zeroes, source1);
          lo1 = vec_mergel(zeroes, source1);
          hi2 = vec_mergeh(zeroes, source2);
          lo2 = vec_mergel(zeroes, source2);

          vec_st(hi1, 0, (unsigned char *)aOurDest);
          vec_st(lo1, 16, (unsigned char *)aOurDest);
          vec_st(hi2, 32, (unsigned char *)aOurDest);
          vec_st(lo2, 48, (unsigned char *)aOurDest);

          i += 32;
          if (i > maxIndex)
            break;
          aOurSource += 32;
          aOurDest += 32;
        }
      }
    }
  }

  // Finish up whatever's left.
  for (; i < aSourceLength; i++) {
    dest[i] = static_cast<unsigned char>(aSource[i]);
  }

  mDestination += i;
}
Ejemplo n.º 9
0
void
LossyConvertEncoding16to8::write_vmx(const char16_t* aSource,
                                      uint32_t aSourceLength)
{
  char* dest = mDestination;

  // Align destination to a 16-byte boundary.
  // We must use unsigned datatypes because aSourceLength is unsigned.
  uint32_t i = 0;
  uint32_t alignLen = XPCOM_MIN(aSourceLength, uint32_t(-NS_PTR_TO_INT32(dest) & 0xf));
  // subtraction result can underflow if aSourceLength < alignLen!!!
  // check for underflow
  if (aSourceLength >= alignLen && aSourceLength - alignLen > 31) {
    for (; i < alignLen; i++) {
      dest[i] = static_cast<unsigned char>(aSource[i]);
    }

    // maxIndex can underflow if aSourceLength < 33!!!
    uint32_t maxIndex = aSourceLength - 33;

    // check for underflow
    if (maxIndex <= aSourceLength && i < maxIndex) {
      const char16_t *aOurSource = &aSource[i];
      char *aOurDest = &dest[i];
      register vector unsigned char packed1, packed2;
      register vector unsigned short source1, source2, source3, source4;
      if ((NS_PTR_TO_UINT32(aOurSource) & 15) == 0) {
        // Walk 64 bytes (four VMX registers) at a time.
        while (1) {
          source1 = vec_ld(0, (unsigned short *)aOurSource);
          source2 = vec_ld(16, (unsigned short *)aOurSource);
          source3 = vec_ld(32, (unsigned short *)aOurSource);
          source4 = vec_ld(48, (unsigned short *)aOurSource);
          packed1 = vec_packsu(source1, source2);
          packed2 = vec_packsu(source3, source4);
          vec_st(packed1, 0, (unsigned char *)aOurDest);
          vec_st(packed2, 16, (unsigned char *)aOurDest);
          i += 32;
          if(i > maxIndex)
           break;
          aOurDest += 32;
          aOurSource += 32;
        }
      }
      else {
        register vector unsigned char mask = vec_lvsl(0, (unsigned short *)aOurSource);
        register vector unsigned short vector1  = vec_ld(0, (unsigned short *)aOurSource);
        register vector unsigned short vector2;
        // Walk 64 bytes (four VMX registers) at a time.
        while (1) {
          LoadUnaligned(source1, 0, (unsigned short *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source2, 16, (unsigned short *)aOurSource, vector2, vector1, mask);
          LoadUnaligned(source3, 32, (unsigned short *)aOurSource, vector1, vector2, mask);
          LoadUnaligned(source4, 48, (unsigned short *)aOurSource, vector2, vector1, mask);
          packed1 = vec_packsu(source1, source2);
          packed2 = vec_packsu(source3, source4);
          vec_st(packed1, 0, (unsigned char *)aOurDest);
          vec_st(packed2, 16, (unsigned char *)aOurDest);
          i += 32;
          if(i > maxIndex)
            break;
          aOurDest += 32;
          aOurSource += 32;
        }
      }
    }
  }

  // Finish up the rest.
  for (; i < aSourceLength; i++) {
    dest[i] = static_cast<unsigned char>(aSource[i]);
  }

  mDestination += i;
}