コード例 #1
0
ファイル: swscale_vsx.c プロジェクト: lihp1603/ffmpeg
static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW,
                           int big_endian, int output_bits)
{
    const int dst_u = -(uintptr_t)dest & 7;
    const int shift = 3;
    const int add = (1 << (shift - 1));
    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);
    vector uint32_t v, v2;
    vector uint16_t vd;
    int i;

    yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0);

    for (i = dst_u; i < dstW - 7; i += 8) {
        v = vec_vsx_ld(0, (const uint32_t *) &src[i]);
        v = vec_add(v, vadd);
        v = vec_sr(v, vshift);

        v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]);
        v2 = vec_add(v2, vadd);
        v2 = vec_sr(v2, vshift);

        vd = vec_packsu(v, v2);
        vd = vec_rl(vd, vswap);

        vec_st(vd, 0, &dest[i]);
    }

    yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i);
}
コード例 #2
0
void imageFilterMean_Altivec(unsigned char *src1, unsigned char *src2, unsigned char *dst, int length)
{
    int n = length;

    // Compute first few values so we're on a 16-byte boundary in dst
    while( (((long)dst & 0xF) > 0) && (n > 0) ) {
         MEAN_PIXEL();
       --n; ++dst; ++src1; ++src2;
    }

    // Do bulk of processing using Altivec (find the mean of 16 8-bit unsigned integers, with saturation)
    vector unsigned char rshft = vec_splat_u8(0x1);
    while(n >= 16) {
        vector unsigned char s1 = vec_ld(0,src1);
        s1 = vec_sr(s1, rshft); // shift right 1
        vector unsigned char s2 = vec_ld(0,src2);
        s2 = vec_sr(s2, rshft); // shift right 1
        vector unsigned char r = vec_adds(s1, s2);
        vec_st(r,0,dst);

        n -= 16; src1 += 16; src2 += 16; dst += 16;
    }

    // If any bytes are left over, deal with them individually
    ++n;
    BASIC_MEAN();
}
コード例 #3
0
ファイル: gimp-composite-altivec.c プロジェクト: 1ynx/gimp
void
gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
  vector unsigned short al,ah;

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      al=vec_mule(a,b);
      al=vec_add(al,ox0080);
      ah=vec_mulo(a,b);
      ah=vec_add(ah,ox0080);
      al=vec_add(al,vec_sr(al,ox0008));
      ah=vec_add(ah,vec_sr(ah,ox0008));
      d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

      alpha_a=vec_and(a, alphamask);
      alpha_b=vec_and(b, alphamask);
      alpha=vec_min(alpha_a, alpha_b);

      d=vec_andc(d, alphamask);
      d=vec_or(d, alpha);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  al=vec_mule(a,b);
  al=vec_add(al,ox0080);
  ah=vec_mulo(a,b);
  ah=vec_add(ah,ox0080);
  al=vec_add(al,vec_sr(al,ox0008));
  ah=vec_add(ah,vec_sr(ah,ox0008));
  d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

  alpha_a=vec_and(a, alphamask);
  alpha_b=vec_and(b, alphamask);
  alpha=vec_min(alpha_a, alpha_b);

  d=vec_andc(d, alphamask);
  d=vec_or(d, alpha);

  StoreUnalignedLess(d, D, length);
}
コード例 #4
0
void
jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
                               JDIMENSION v_samp_factor,
                               JDIMENSION width_blocks,
                               JSAMPARRAY input_data, JSAMPARRAY output_data)
{
  int outrow, outcol;
  JDIMENSION output_cols = width_blocks * DCTSIZE;
  JSAMPROW inptr, outptr;

  __vector unsigned char this0, next0, out;
  __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;

  /* Constants */
  __vector unsigned short pw_bias = { __4X2(0, 1) },
    pw_one = { __8X(1) };
  __vector unsigned char even_odd_index =
    {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},
    pb_zero = { __16X(0) };

  expand_right_edge(input_data, max_v_samp_factor, image_width,
                    output_cols * 2);

  for (outrow = 0; outrow < v_samp_factor; outrow++) {
    outptr = output_data[outrow];
    inptr = input_data[outrow];

    for (outcol = output_cols; outcol > 0;
         outcol -= 16, inptr += 32, outptr += 16) {

      this0 = vec_ld(0, inptr);
      this0 = vec_perm(this0, this0, even_odd_index);
      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
      outl = vec_add(this0e, this0o);
      outl = vec_add(outl, pw_bias);
      outl = vec_sr(outl, pw_one);

      if (outcol > 8) {
        next0 = vec_ld(16, inptr);
        next0 = vec_perm(next0, next0, even_odd_index);
        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
        outh = vec_add(next0e, next0o);
        outh = vec_add(outh, pw_bias);
        outh = vec_sr(outh, pw_one);
      } else
        outh = vec_splat_u16(0);

      out = vec_pack(outl, outh);
      vec_st(out, 0, outptr);
    }
  }
}
コード例 #5
0
ファイル: swscale_vsx.c プロジェクト: lihp1603/ffmpeg
static void yuv2plane1_nbps_vsx(const int16_t *src, uint16_t *dest, int dstW,
                           int big_endian, int output_bits)
{
    const int dst_u = -(uintptr_t)dest & 7;
    const int shift = 15 - output_bits;
    const int add = (1 << (shift - 1));
    const int clip = (1 << output_bits) - 1;
    const vector uint16_t vadd = (vector uint16_t) {add, add, add, add, add, add, add, add};
    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
    const vector uint16_t vshift = (vector uint16_t) vec_splat_u16(shift);
    const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip};
    vector uint16_t v;
    int i;

    yuv2plane1_nbps_u(src, dest, dst_u, big_endian, output_bits, 0);

    for (i = dst_u; i < dstW - 7; i += 8) {
        v = vec_vsx_ld(0, (const uint16_t *) &src[i]);
        v = vec_add(v, vadd);
        v = vec_sr(v, vshift);
        v = vec_min(v, vlargest);
        v = vec_rl(v, vswap);
        vec_st(v, 0, &dest[i]);
    }

    yuv2plane1_nbps_u(src, dest, dstW, big_endian, output_bits, i);
}
コード例 #6
0
ファイル: int_altivec.c プロジェクト: 10045125/xuggle-xuggler
static int32_t scalarproduct_int16_altivec(const int16_t * v1, const int16_t * v2, int order, const int shift)
{
    int i;
    LOAD_ZERO;
    register vec_s16 vec1, *pv;
    register vec_s32 res = vec_splat_s32(0), t;
    register vec_u32 shifts;
    int32_t ires;

    shifts = zero_u32v;
    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));
    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));
    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));
    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));
    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));

    for(i = 0; i < order; i += 8){
        pv = (vec_s16*)v1;
        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
        t = vec_sr(t, shifts);
        res = vec_sums(t, res);
        v1 += 8;
        v2 += 8;
    }
    res = vec_splat(res, 3);
    vec_ste(res, 0, &ires);
    return ires;
}
コード例 #7
0
 SIMD_INLINE v128_u16 Interpolate(v128_u16 s[2][2], v128_u16 k[2][2])
 {
     v128_u16 r = vec_mladd(s[0][0], k[0][0], K16_BILINEAR_ROUND_TERM);
     r = vec_mladd(s[0][1], k[0][1], r);
     r = vec_mladd(s[1][0], k[1][0], r);
     r = vec_mladd(s[1][1], k[1][1], r);
     return vec_sr(r, K16_BILINEAR_SHIFT);
 }
コード例 #8
0
ファイル: SimdVmxBgrToGray.cpp プロジェクト: 4144/Simd
 SIMD_INLINE v128_u32 BgraToGray32(v128_u8 bgra)
 {
     const v128_u16 _b_r = vec_mule(bgra, K8_01);
     const v128_u16 _g_a = vec_mulo(bgra, K8_01);
     const v128_u32 weightedSum = vec_add(vec_mule(_g_a, K16_GREEN_0000),
         vec_add(vec_mule(_b_r, K16_BLUE_RED), vec_mulo(_b_r, K16_BLUE_RED)));
     return vec_sr(vec_add(weightedSum, K32_ROUND_TERM), K32_SHIFT);
 }
コード例 #9
0
ファイル: quant_h263_altivec.c プロジェクト: roozbeh/openCU
uint32_t
quant_h263_inter_altivec_c(int16_t *coeff,
                            int16_t *data,
                            const uint32_t quant,
                            const uint16_t *mpeg_quant_matrices)
{
    vector unsigned char zerovec;
    vector unsigned short mult;
    vector unsigned short quant_m_2;
    vector unsigned short quant_d_2;
    vector unsigned short sum_short;
    vector signed short acLevel;
    
    vector unsigned int even;
    vector unsigned int odd;
    
    vector bool short m2_mask;
    vector bool short zero_mask;
    
    uint32_t result;

#ifdef DEBUG
    if(((unsigned)coeff) & 0x15)
        fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx\n", (long)coeff);
#endif
    
    /* initialisation stuff */
    zerovec = vec_splat_u8(0);
    *((unsigned short*)&mult) = (unsigned short)multipliers[quant];
    mult = vec_splat(mult, 0);
    *((unsigned short*)&quant_m_2) = (unsigned short)quant;
    quant_m_2 = vec_splat(quant_m_2, 0);
    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));
    *((unsigned short*)&quant_d_2) = (unsigned short)quant;
    quant_d_2 = vec_splat(quant_d_2, 0);
    quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));
    sum_short = (vector unsigned short)zerovec;
    
    /* Quantize */
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
        
    /* Calculate the return value */
    even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);
    even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);
    even = vec_splat(even, 3);
    vec_ste(even, 0, &result);
    return result;
}
コード例 #10
0
ファイル: pixman-vmx.c プロジェクト: 1833183060/wke
static force_inline vector unsigned int
pix_multiply (vector unsigned int p, vector unsigned int a)
{
    vector unsigned short hi, lo, mod;

    /* unpack to short */
    hi = (vector unsigned short)
	vec_mergeh ((vector unsigned char)AVV (0),
		    (vector unsigned char)p);

    mod = (vector unsigned short)
	vec_mergeh ((vector unsigned char)AVV (0),
		    (vector unsigned char)a);

    hi = vec_mladd (hi, mod, (vector unsigned short)
                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
                         0x0080, 0x0080, 0x0080, 0x0080));

    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));

    hi = vec_sr (hi, vec_splat_u16 (8));

    /* unpack to short */
    lo = (vector unsigned short)
	vec_mergel ((vector unsigned char)AVV (0),
		    (vector unsigned char)p);
    mod = (vector unsigned short)
	vec_mergel ((vector unsigned char)AVV (0),
		    (vector unsigned char)a);

    lo = vec_mladd (lo, mod, (vector unsigned short)
                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
                         0x0080, 0x0080, 0x0080, 0x0080));

    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));

    lo = vec_sr (lo, vec_splat_u16 (8));

    return (vector unsigned int)vec_packsu (hi, lo);
}
コード例 #11
0
ファイル: h264_altivec.c プロジェクト: BOTCrusher/sagetv
/* this code assume that stride % 16 == 0 */
void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
    signed int ABCD[4] __attribute__((aligned(16))) =
                        {((8 - x) * (8 - y)),
                          ((x) * (8 - y)),
                          ((8 - x) * (y)),
                          ((x) * (y))};
    register int i;
    vector unsigned char fperm;
    const vector signed int vABCD = vec_ld(0, ABCD);
    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
    const vector signed int vzero = vec_splat_s32(0);
    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
    const vector unsigned short v6us = vec_splat_u16(6);
    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;

    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
    vector unsigned char vsrc0uc, vsrc1uc;
    vector signed short vsrc0ssH, vsrc1ssH;
    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
    vector signed short vsrc2ssH, vsrc3ssH, psum;
    vector unsigned char vdst, ppsum, fsum;

    if (((unsigned long)dst) % 16 == 0) {
      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
                                        0x14, 0x15, 0x16, 0x17,
                                        0x08, 0x09, 0x0A, 0x0B,
                                        0x0C, 0x0D, 0x0E, 0x0F);
    } else {
      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
                                        0x04, 0x05, 0x06, 0x07,
                                        0x18, 0x19, 0x1A, 0x1B,
                                        0x1C, 0x1D, 0x1E, 0x1F);
    }

    vsrcAuc = vec_ld(0, src);

    if (loadSecond)
      vsrcBuc = vec_ld(16, src);
    vsrcperm0 = vec_lvsl(0, src);
    vsrcperm1 = vec_lvsl(1, src);

    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
    if (reallyBadAlign)
      vsrc1uc = vsrcBuc;
    else
      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);

    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                               (vector unsigned char)vsrc0uc);
    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                               (vector unsigned char)vsrc1uc);

    if (!loadSecond) {// -> !reallyBadAlign
      for (i = 0 ; i < h ; i++) {


        vsrcCuc = vec_ld(stride + 0, src);

        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);

        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                                (vector unsigned char)vsrc2uc);
        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                                (vector unsigned char)vsrc3uc);

        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v28ss, psum);
        psum = vec_sra(psum, v6us);

        vdst = vec_ld(0, dst);
        ppsum = (vector unsigned char)vec_packsu(psum, psum);
        fsum = vec_perm(vdst, ppsum, fperm);

        vec_st(fsum, 0, dst);

        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;

        dst += stride;
        src += stride;
      }
    } else {
        vector unsigned char vsrcDuc;
      for (i = 0 ; i < h ; i++) {
        vsrcCuc = vec_ld(stride + 0, src);
        vsrcDuc = vec_ld(stride + 16, src);

        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
        if (reallyBadAlign)
          vsrc3uc = vsrcDuc;
        else
          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);

        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                                (vector unsigned char)vsrc2uc);
        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
                                                (vector unsigned char)vsrc3uc);

        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v28ss, psum);
        psum = vec_sr(psum, v6us);

        vdst = vec_ld(0, dst);
        ppsum = (vector unsigned char)vec_pack(psum, psum);
        fsum = vec_perm(vdst, ppsum, fperm);

        vec_st(fsum, 0, dst);

        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;

        dst += stride;
        src += stride;
      }
    }
}
コード例 #12
0
ファイル: dsputil_altivec.c プロジェクト: 0xFFeng/ffmpeg
static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
    int i;
    int s;
    uint8_t *pix3 = pix2 + line_size;
    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
    const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2);
    vector unsigned char avgv, t5;
    vector unsigned char perm1 = vec_lvsl(0, pix2);
    vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
    vector unsigned char pix2l, pix2r;
    vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
    vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
    vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
    vector unsigned short avghv, avglv;
    vector unsigned short t1, t2, t3, t4;
    vector unsigned int sad;
    vector signed int sumdiffs;

    sad = (vector unsigned int)vec_splat_u32(0);

    s = 0;

    /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
       iteration becomes pix2 in the next iteration. We can use this
       fact to avoid a potentially expensive unaligned read, as well
       as some splitting, and vector addition each time around the loop.
       Read unaligned pixels into our vectors. The vectors are as follows:
       pix2v: pix2[0]-pix2[15]  pix2iv: pix2[1]-pix2[16]
       Split the pixel vectors into shorts */
    pix2l  = vec_ld( 0, pix2);
    pix2r  = vec_ld(16, pix2);
    pix2v  = vec_perm(pix2l, pix2r, perm1);
    pix2iv = vec_perm(pix2l, pix2r, perm2);

    pix2hv  = (vector unsigned short) vec_mergeh(zero, pix2v);
    pix2lv  = (vector unsigned short) vec_mergel(zero, pix2v);
    pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv);
    pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
    t1 = vec_add(pix2hv, pix2ihv);
    t2 = vec_add(pix2lv, pix2ilv);

    for (i = 0; i < h; i++) {
        /* Read unaligned pixels into our vectors. The vectors are as follows:
           pix1v: pix1[0]-pix1[15]
           pix3v: pix3[0]-pix3[15]      pix3iv: pix3[1]-pix3[16] */
        pix1v = vec_ld(0, pix1);

        pix2l  = vec_ld( 0, pix3);
        pix2r  = vec_ld(16, pix3);
        pix3v  = vec_perm(pix2l, pix2r, perm1);
        pix3iv = vec_perm(pix2l, pix2r, perm2);

        /* Note that AltiVec does have vec_avg, but this works on vector pairs
           and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
           would mean that, for example, avg(3,0,0,1) = 2, when it should be 1.
           Instead, we have to split the pixel vectors into vectors of shorts,
           and do the averaging by hand. */

        /* Split the pixel vectors into shorts */
        pix3hv  = (vector unsigned short) vec_mergeh(zero, pix3v);
        pix3lv  = (vector unsigned short) vec_mergel(zero, pix3v);
        pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
        pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);

        /* Do the averaging on them */
        t3 = vec_add(pix3hv, pix3ihv);
        t4 = vec_add(pix3lv, pix3ilv);

        avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
        avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);

        /* Pack the shorts back into a result */
        avgv = vec_pack(avghv, avglv);

        /* Calculate a sum of abs differences vector */
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);

        pix1 += line_size;
        pix3 += line_size;
        /* Transfer the calculated values for pix3 into pix2 */
        t1 = t3;
        t2 = t4;
    }
    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
コード例 #13
0
 SIMD_INLINE v128_u16 DivideBy16(v128_u16 value)
 {
     return vec_sr(vec_add(value, K16_0008), K16_0004);
 }
コード例 #14
0
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
{
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
    const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) =
      {rounder, rounder, rounder, rounder,
       rounder, rounder, rounder, rounder};
    const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
      {
        (16-x16)*(16-y16), /* A */
        (   x16)*(16-y16), /* B */
        (16-x16)*(   y16), /* C */
        (   x16)*(   y16), /* D */
        0, 0, 0, 0         /* padding */
      };
    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
    register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8);
    register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
    register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
    int i;
    unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
    unsigned long src_really_odd = (unsigned long)src & 0x0000000F;


POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);

    tempA = vec_ld(0, (unsigned short*)ABCD);
    Av = vec_splat(tempA, 0);
    Bv = vec_splat(tempA, 1);
    Cv = vec_splat(tempA, 2);
    Dv = vec_splat(tempA, 3);

    rounderV = vec_ld(0, (unsigned short*)rounder_a);

    // we'll be able to pick-up our 9 char elements
    // at src from those 32 bytes
    // we load the first batch here, as inside the loop
    // we can re-use 'src+stride' from one iteration
    // as the 'src' of the next.
    src_0 = vec_ld(0, src);
    src_1 = vec_ld(16, src);
    srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));

    if (src_really_odd != 0x0000000F)
    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
      srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
    }
    else
    {
      srcvB = src_1;
    }
    srcvA = vec_mergeh(vczero, srcvA);
    srcvB = vec_mergeh(vczero, srcvB);

    for(i=0; i<h; i++)
    {
      dst_odd = (unsigned long)dst & 0x0000000F;
      src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;

      dstv = vec_ld(0, dst);

      // we we'll be able to pick-up our 9 char elements
      // at src + stride from those 32 bytes
      // then reuse the resulting 2 vectors srvcC and srcvD
      // as the next srcvA and srcvB
      src_0 = vec_ld(stride + 0, src);
      src_1 = vec_ld(stride + 16, src);
      srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));

      if (src_really_odd != 0x0000000F)
      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
        srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
      }
      else
      {
        srcvD = src_1;
      }

      srcvC = vec_mergeh(vczero, srcvC);
      srcvD = vec_mergeh(vczero, srcvD);


      // OK, now we (finally) do the math :-)
      // those four instructions replaces 32 int muls & 32 int adds.
      // isn't AltiVec nice ?
      tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
      tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
      tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
      tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);

      srcvA = srcvC;
      srcvB = srcvD;

      tempD = vec_sr(tempD, vcsr8);

      dstv2 = vec_pack(tempD, (vector unsigned short)vczero);

      if (dst_odd)
      {
        dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
      }
      else
      {
        dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
      }

      vec_st(dstv2, 0, dst);

      dst += stride;
      src += stride;
    }

POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
}
コード例 #15
0
/* this code assume that stride % 16 == 0 */
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
    signed int ABCD[4] __attribute__((aligned(16)));
    register int i;
    ABCD[0] = ((8 - x) * (8 - y));
    ABCD[1] = ((x) * (8 - y));
    ABCD[2] = ((8 - x) * (y));
    ABCD[3] = ((x) * (y));
    const vector signed int vABCD = vec_ld(0, ABCD);
    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
    const vector signed int vzero = vec_splat_s32(0);
    const vector signed short v32ss = (const vector signed short)AVV(32);
    const vector unsigned short v6us = vec_splat_u16(6);

    vector unsigned char fperm;

    if (((unsigned long)dst) % 16 == 0) {
      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
                                        0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
    } else {
      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
                                        0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
    }

    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
    
    vector unsigned char vsrcAuc;
    vector unsigned char vsrcBuc;
    vector unsigned char vsrcperm0;
    vector unsigned char vsrcperm1;
    vsrcAuc = vec_ld(0, src);
    if (loadSecond)
      vsrcBuc = vec_ld(16, src);
    vsrcperm0 = vec_lvsl(0, src);
    vsrcperm1 = vec_lvsl(1, src);
    
    vector unsigned char vsrc0uc;
    vector unsigned char vsrc1uc;
    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
    if (reallyBadAlign)
      vsrc1uc = vsrcBuc;
    else
      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
    
    vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc);
    vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc);

    if (!loadSecond) {// -> !reallyBadAlign
      for (i = 0 ; i < h ; i++) {
        vector unsigned char vsrcCuc;
        vsrcCuc = vec_ld(stride + 0, src);
        
        vector unsigned char vsrc2uc;
        vector unsigned char vsrc3uc;
        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
        
        vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
        vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
        
        vector signed short psum;
        
        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v32ss, psum);
        psum = vec_sra(psum, v6us);
        
        vector unsigned char vdst = vec_ld(0, dst);
        vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum);
        
        vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
        vector unsigned char fsum;
        
        OP_U8_ALTIVEC(fsum, vfdst, vdst);

        vec_st(fsum, 0, dst);
        
        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;
        
        dst += stride;
        src += stride;
      }
    } else {
      for (i = 0 ; i < h ; i++) {
        vector unsigned char vsrcCuc;
        vector unsigned char vsrcDuc;
        vsrcCuc = vec_ld(stride + 0, src);
        vsrcDuc = vec_ld(stride + 16, src);
        
        vector unsigned char vsrc2uc;
        vector unsigned char vsrc3uc;
        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
        if (reallyBadAlign)
          vsrc3uc = vsrcDuc;
        else
          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
        
        vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
        vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
        
        vector signed short psum;
      
        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v32ss, psum);
        psum = vec_sr(psum, v6us);
        
        vector unsigned char vdst = vec_ld(0, dst);
        vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum); 
        
        vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
        vector unsigned char fsum;
        
        OP_U8_ALTIVEC(fsum, vfdst, vdst);

        vec_st(fsum, 0, dst);
        
        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;
        
        dst += stride;
        src += stride;
      }
    }
    POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
}
コード例 #16
0
/* this code assume that stride % 16 == 0 */
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
    DECLARE_ALIGNED_16(signed int, ABCD[4]) =
                        {((8 - x) * (8 - y)),
                          ((x) * (8 - y)),
                          ((8 - x) * (y)),
                          ((x) * (y))};
    register int i;
    vec_u8_t fperm;
    const vec_s32_t vABCD = vec_ld(0, ABCD);
    const vec_s16_t vA = vec_splat((vec_s16_t)vABCD, 1);
    const vec_s16_t vB = vec_splat((vec_s16_t)vABCD, 3);
    const vec_s16_t vC = vec_splat((vec_s16_t)vABCD, 5);
    const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);
    LOAD_ZERO;
    const vec_s16_t v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
    const vec_u16_t v6us = vec_splat_u16(6);
    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;

    vec_u8_t vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
    vec_u8_t vsrc0uc, vsrc1uc;
    vec_s16_t vsrc0ssH, vsrc1ssH;
    vec_u8_t vsrcCuc, vsrc2uc, vsrc3uc;
    vec_s16_t vsrc2ssH, vsrc3ssH, psum;
    vec_u8_t vdst, ppsum, vfdst, fsum;

  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);

    if (((unsigned long)dst) % 16 == 0) {
      fperm = (vec_u8_t)AVV(0x10, 0x11, 0x12, 0x13,
                            0x14, 0x15, 0x16, 0x17,
                            0x08, 0x09, 0x0A, 0x0B,
                            0x0C, 0x0D, 0x0E, 0x0F);
    } else {
      fperm = (vec_u8_t)AVV(0x00, 0x01, 0x02, 0x03,
                            0x04, 0x05, 0x06, 0x07,
                            0x18, 0x19, 0x1A, 0x1B,
                            0x1C, 0x1D, 0x1E, 0x1F);
    }

    vsrcAuc = vec_ld(0, src);

    if (loadSecond)
      vsrcBuc = vec_ld(16, src);
    vsrcperm0 = vec_lvsl(0, src);
    vsrcperm1 = vec_lvsl(1, src);

    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
    if (reallyBadAlign)
      vsrc1uc = vsrcBuc;
    else
      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);

    vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);
    vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);

    if (!loadSecond) {// -> !reallyBadAlign
      for (i = 0 ; i < h ; i++) {


        vsrcCuc = vec_ld(stride + 0, src);

        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);

        vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);
        vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);

        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v32ss, psum);
        psum = vec_sra(psum, v6us);

        vdst = vec_ld(0, dst);
        ppsum = (vec_u8_t)vec_packsu(psum, psum);
        vfdst = vec_perm(vdst, ppsum, fperm);

        OP_U8_ALTIVEC(fsum, vfdst, vdst);

        vec_st(fsum, 0, dst);

        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;

        dst += stride;
        src += stride;
      }
    } else {
        vec_u8_t vsrcDuc;
      for (i = 0 ; i < h ; i++) {
        vsrcCuc = vec_ld(stride + 0, src);
        vsrcDuc = vec_ld(stride + 16, src);

        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
        if (reallyBadAlign)
          vsrc3uc = vsrcDuc;
        else
          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);

        vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);
        vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);

        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
        psum = vec_mladd(vB, vsrc1ssH, psum);
        psum = vec_mladd(vC, vsrc2ssH, psum);
        psum = vec_mladd(vD, vsrc3ssH, psum);
        psum = vec_add(v32ss, psum);
        psum = vec_sr(psum, v6us);

        vdst = vec_ld(0, dst);
        ppsum = (vec_u8_t)vec_pack(psum, psum);
        vfdst = vec_perm(vdst, ppsum, fperm);

        OP_U8_ALTIVEC(fsum, vfdst, vdst);

        vec_st(fsum, 0, dst);

        vsrc0ssH = vsrc2ssH;
        vsrc1ssH = vsrc3ssH;

        dst += stride;
        src += stride;
      }
    }
    POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
}
コード例 #17
0
 SIMD_INLINE v128_s16 Average(const v128_u8 & s0, const v128_u8 & s1)
 {
     return (v128_s16)vec_sr(vec_add(vec_add(
         vec_add(vec_mule(s0, K8_01), vec_mulo(s0, K8_01)),
         vec_add(vec_mule(s1, K8_01), vec_mulo(s1, K8_01))), K16_0002), K16_0002);
 }
コード例 #18
0
 SIMD_INLINE v128_s16 Average(const v128_u8 & s)
 {
     return (v128_s16)vec_sr(vec_add(vec_add(vec_mule(s, K8_01), vec_mulo(s, K8_01)), K16_0001), K16_0001);
 }
コード例 #19
0
ファイル: gimp-composite-altivec.c プロジェクト: 1ynx/gimp
void
gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
{
  const guchar *A = ctx->A;
  const guchar *B = ctx->B;
  guchar *D = ctx->D;
  guint length = ctx->n_pixels;
  guchar blend = ctx->blend.blend;
  union
    {
      vector unsigned char v;
      unsigned char u8[16];
    } vblend;

  vector unsigned char vblendc;
  vector unsigned char a,b,d;
  vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1);
  guchar tmp;

  for (tmp=0; tmp<16; tmp++ )
    vblend.u8[tmp]=blend;
  vblendc=vec_nor(vblend.v,vblend.v);

  while (length >= 4)
    {
      a=LoadUnaligned(A);
      b=LoadUnaligned(B);

      /* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255;
       * to divide by 255 we use ((n+1)+(n+1)>>8)>>8
       * It works for all value but 0xffff
       * happily blending formula can't give this value */

      al=vec_mule(a,vblendc);
      ah=vec_mulo(a,vblendc);

      bl=vec_mule(b,vblend.v);
      bh=vec_mulo(b,vblend.v);

      al=vec_add(al,bl);
      al=vec_add(al,one);
      al=vec_add(al,vec_sr(al,ox0008));

      ah=vec_add(ah,bh);
      ah=vec_add(ah,one);
      ah=vec_add(ah,vec_sr(ah,ox0008));

      d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

      StoreUnaligned(d, D);

      A+=16;
      B+=16;
      D+=16;
      length-=4;
    }
  /* process last pixels */
  length = length*4;
  a=LoadUnalignedLess(A, length);
  b=LoadUnalignedLess(B, length);

  al=vec_mule(a,vblendc);
  ah=vec_mulo(a,vblendc);

  bl=vec_mule(b,vblend.v);
  bh=vec_mulo(b,vblend.v);

  al=vec_add(al,bl);
  al=vec_add(al,one);
  al=vec_add(al,vec_sr(al,ox0008));

  ah=vec_add(ah,bh);
  ah=vec_add(ah,one);
  ah=vec_add(ah,vec_sr(ah,ox0008));

  d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

  StoreUnalignedLess(d, D, length);
}
コード例 #20
0
vector unsigned short
testsr_unsigned (vector unsigned short x, vector unsigned short y)
{
  return vec_sr (x, y);
}
コード例 #21
0
// CHECK-LABEL: define void @test1
void test1() {

  /* vec_cmpeq */
  res_vbll = vec_cmpeq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd
// CHECK-LE: @llvm.ppc.altivec.vcmpequd
// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous

  res_vbll = vec_cmpeq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd
// CHECK-LE: @llvm.ppc.altivec.vcmpequd
// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous

  /* vec_cmpgt */
  res_vbll = vec_cmpgt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous

  res_vbll = vec_cmpgt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous

  /* ----------------------- predicates --------------------------- */
  /* vec_all_eq */
  res_i = vec_all_eq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  /* vec_all_ne */
  res_i = vec_all_ne(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  /* vec_any_eq */
  res_i = vec_any_eq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  /* vec_any_ne */
  res_i = vec_any_ne(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  /* vec_all_ge */
  res_i = vec_all_ge(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  /* vec_all_gt */
  res_i = vec_all_gt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  /* vec_all_le */
  res_i = vec_all_le(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  /* vec_all_lt */
  res_i = vec_all_lt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  /* vec_any_ge */
  res_i = vec_any_ge(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  /* vec_any_gt */
  res_i = vec_any_gt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  /* vec_any_le */
  res_i = vec_any_le(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  /* vec_any_lt */
  res_i = vec_any_lt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  /* vec_max */
  res_vsll = vec_max(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vsll = vec_max(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vsll = vec_max(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vull, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vbll, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vull, vbll);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  /* vec_min */
  res_vsll = vec_min(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vsll = vec_min(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vsll = vec_min(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vull, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vbll, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vull, vbll);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  /* vec_mule */
  res_vsll = vec_mule(vi, vi);
// CHECK: @llvm.ppc.altivec.vmulesw
// CHECK-LE: @llvm.ppc.altivec.vmulosw
// CHECK-PPC: error: call to 'vec_mule' is ambiguous

  res_vull = vec_mule(vui , vui);
// CHECK: @llvm.ppc.altivec.vmuleuw
// CHECK-LE: @llvm.ppc.altivec.vmulouw
// CHECK-PPC: error: call to 'vec_mule' is ambiguous

  /* vec_mulo */
  res_vsll = vec_mulo(vi, vi);
// CHECK: @llvm.ppc.altivec.vmulosw
// CHECK-LE: @llvm.ppc.altivec.vmulesw
// CHECK-PPC: error: call to 'vec_mulo' is ambiguous

  res_vull = vec_mulo(vui, vui);
// CHECK: @llvm.ppc.altivec.vmulouw
// CHECK-LE: @llvm.ppc.altivec.vmuleuw
// CHECK-PPC: error: call to 'vec_mulo' is ambiguous

  /* vec_packs */
  res_vi = vec_packs(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vpksdss
// CHECK-LE: @llvm.ppc.altivec.vpksdss
// CHECK-PPC: error: call to 'vec_packs' is ambiguous

  res_vui = vec_packs(vull, vull);
// CHECK: @llvm.ppc.altivec.vpkudus
// CHECK-LE: @llvm.ppc.altivec.vpkudus
// CHECK-PPC: error: call to 'vec_packs' is ambiguous

  /* vec_packsu */
  res_vui = vec_packsu(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vpksdus
// CHECK-LE: @llvm.ppc.altivec.vpksdus
// CHECK-PPC: error: call to 'vec_packsu' is ambiguous

  res_vui = vec_packsu(vull, vull);
// CHECK: @llvm.ppc.altivec.vpkudus
// CHECK-LE: @llvm.ppc.altivec.vpkudus
// CHECK-PPC: error: call to 'vec_packsu' is ambiguous

  /* vec_rl */
  res_vsll = vec_rl(vsll, vull);
// CHECK: @llvm.ppc.altivec.vrld
// CHECK-LE: @llvm.ppc.altivec.vrld
// CHECK-PPC: error: call to 'vec_rl' is ambiguous

  res_vull = vec_rl(vull, vull);
// CHECK: @llvm.ppc.altivec.vrld
// CHECK-LE: @llvm.ppc.altivec.vrld
// CHECK-PPC: error: call to 'vec_rl' is ambiguous

  /* vec_sl */
  res_vsll = vec_sl(vsll, vull);
// CHECK: shl <2 x i64>
// CHECK-LE: shl <2 x i64>
// CHECK-PPC: error: call to 'vec_sl' is ambiguous

  res_vull = vec_sl(vull, vull);
// CHECK: shl <2 x i64>
// CHECK-LE: shl <2 x i64>
// CHECK-PPC: error: call to 'vec_sl' is ambiguous

  /* vec_sr */
  res_vsll = vec_sr(vsll, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

  res_vull = vec_sr(vull, vull);
// CHECK: lshr <2 x i64>
// CHECK-LE: lshr <2 x i64>
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

  /* vec_sra */
  res_vsll = vec_sra(vsll, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sra' is ambiguous

  res_vull = vec_sra(vull, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sra' is ambiguous

  /* vec_unpackh */
  res_vsll = vec_unpackh(vi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: error: call to 'vec_unpackh' is ambiguous

  res_vbll = vec_unpackh(vbi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: error: call to 'vec_unpackh' is ambiguous

  /* vec_unpackl */
  res_vsll = vec_unpackl(vi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: error: call to 'vec_unpackl' is ambiguous

  res_vbll = vec_unpackl(vbi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: error: call to 'vec_unpackl' is ambiguous

  /* vec_vpksdss */
  res_vi = vec_vpksdss(vsll, vsll);
// CHECK: llvm.ppc.altivec.vpksdss
// CHECK-LE: llvm.ppc.altivec.vpksdss
// CHECK-PPC: warning: implicit declaration of function 'vec_vpksdss'

  /* vec_vpksdus */
  res_vui = vec_vpksdus(vsll, vsll);
// CHECK: llvm.ppc.altivec.vpksdus
// CHECK-LE: llvm.ppc.altivec.vpksdus
// CHECK-PPC: warning: implicit declaration of function 'vec_vpksdus'

  /* vec_vpkudum */
  res_vi = vec_vpkudum(vsll, vsll);
// CHECK: vperm
// CHECK-LE: vperm
// CHECK-PPC: warning: implicit declaration of function 'vec_vpkudum'

  res_vui = vec_vpkudum(vull, vull);
// CHECK: vperm
// CHECK-LE: vperm

  res_vui = vec_vpkudus(vull, vull);
// CHECK: llvm.ppc.altivec.vpkudus
// CHECK-LE: llvm.ppc.altivec.vpkudus
// CHECK-PPC: warning: implicit declaration of function 'vec_vpkudus'

  /* vec_vupkhsw */
  res_vsll = vec_vupkhsw(vi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: warning: implicit declaration of function 'vec_vupkhsw'

  res_vbll = vec_vupkhsw(vbi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw

  /* vec_vupklsw */
  res_vsll = vec_vupklsw(vi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: warning: implicit declaration of function 'vec_vupklsw'

  res_vbll = vec_vupklsw(vbi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw

  /* vec_max */
  res_vsll = vec_max(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vsll = vec_max(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vsll = vec_max(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vull = vec_max(vull, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud

  res_vull = vec_max(vbll, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud

  /* vec_min */
  res_vsll = vec_min(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vsll = vec_min(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vsll = vec_min(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vull = vec_min(vull, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud

  res_vull = vec_min(vbll, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud

}
コード例 #22
0
ファイル: sha256.c プロジェクト: gnoll110/cellminer
static inline
vec_uint4 vec_SHR(unsigned int n, vec_uint4 x)
{
  return vec_sr(x, vec_splat_u32(n));
}
コード例 #23
0
ファイル: dct.c プロジェクト: Hero2000/CainCamera
void x264_zigzag_interleave_8x8_cavlc_altivec( int16_t *dst, int16_t *src, uint8_t *nnz )
{
    vec_s16_t tmpv[8];
    vec_s16_t merge[2];
    vec_s16_t permv[2];
    vec_s16_t orv[4];
    vec_s16_t src0v = vec_ld( 0*16, src );
    vec_s16_t src1v = vec_ld( 1*16, src );
    vec_s16_t src2v = vec_ld( 2*16, src );
    vec_s16_t src3v = vec_ld( 3*16, src );
    vec_s16_t src4v = vec_ld( 4*16, src );
    vec_s16_t src5v = vec_ld( 5*16, src );
    vec_s16_t src6v = vec_ld( 6*16, src );
    vec_s16_t src7v = vec_ld( 7*16, src );
    vec_u8_t pack;
    vec_u8_t nnzv = vec_vsx_ld( 0, nnz );
    vec_u8_t shift = vec_splat_u8( 7 );
    LOAD_ZERO;

    const vec_u8_t mask[3] = {
        { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
        { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F },
        { 0x10, 0x11, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x12, 0x13, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F }
    };

    tmpv[0] = vec_mergeh( src0v, src1v );
    tmpv[1] = vec_mergel( src0v, src1v );

    tmpv[2] = vec_mergeh( src2v, src3v );
    tmpv[3] = vec_mergel( src2v, src3v );

    tmpv[4] = vec_mergeh( src4v, src5v );
    tmpv[5] = vec_mergel( src4v, src5v );

    tmpv[6] = vec_mergeh( src6v, src7v );
    tmpv[7] = vec_mergel( src6v, src7v );

    merge[0] = vec_mergeh( tmpv[0], tmpv[1] );
    merge[1] = vec_mergeh( tmpv[2], tmpv[3] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[1] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 0*16, dst );

    merge[0] = vec_mergeh( tmpv[4], tmpv[5] );
    merge[1] = vec_mergeh( tmpv[6], tmpv[7] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[2] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 1*16, dst );
    vec_st( permv[1], 2*16, dst );
    vec_st( permv[2], 3*16, dst );

    merge[0] = vec_mergel( tmpv[0], tmpv[1] );
    merge[1] = vec_mergel( tmpv[2], tmpv[3] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[1] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 4*16, dst );

    merge[0] = vec_mergel( tmpv[4], tmpv[5] );
    merge[1] = vec_mergel( tmpv[6], tmpv[7] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[2] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 5*16, dst );
    vec_st( permv[1], 6*16, dst );
    vec_st( permv[2], 7*16, dst );

    orv[0] = vec_or( src0v, src1v );
    orv[1] = vec_or( src2v, src3v );
    orv[2] = vec_or( src4v, src5v );
    orv[3] = vec_or( src6v, src7v );

    permv[0] = vec_or( orv[0], orv[1] );
    permv[1] = vec_or( orv[2], orv[3] );
    permv[0] = vec_or( permv[0], permv[1] );

    permv[1] = vec_perm( permv[0], permv[0], mask[1] );
    permv[0] = vec_or( permv[0], permv[1] );

    pack = (vec_u8_t)vec_packs( permv[0], permv[0] );
    pack = (vec_u8_t)vec_cmpeq( pack, zerov );
    pack = vec_nor( pack, zerov );
    pack = vec_sr( pack, shift );
    nnzv = vec_perm( nnzv, pack, mask[2] );
    vec_st( nnzv, 0, nnz );
}
コード例 #24
0
ファイル: SimdVmxReduceGray3x3.cpp プロジェクト: 4144/Simd
 template <> SIMD_INLINE v128_u16 DivideBy16<false>(v128_u16 value)
 {
     return vec_sr(value, K16_0004);
 }
コード例 #25
0
ファイル: SimdVmxReduceGray3x3.cpp プロジェクト: 4144/Simd
 template <> SIMD_INLINE v128_u16 DivideBy16<true>(v128_u16 value)
 {
     return vec_sr(vec_add(value, K16_0008), K16_0004);
 }
コード例 #26
0
ファイル: gmc_altivec.c プロジェクト: venkatarajasekhar/Qt
/* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8
 * to preserve proper dst alignment. */
void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
                     int stride, int h, int x16, int y16, int rounder)
{
    int i;
    const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
    const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] = {
        (16 - x16) * (16 - y16), /* A */
             (x16) * (16 - y16), /* B */
        (16 - x16) * (y16),      /* C */
             (x16) * (y16),      /* D */
        0, 0, 0, 0               /* padding */
    };
    register const vector unsigned char vczero =
        (const vector unsigned char) vec_splat_u8(0);
    register const vector unsigned short vcsr8 =
        (const vector unsigned short) vec_splat_u16(8);
    register vector unsigned char dstv, dstv2, srcvB, srcvC, srcvD;
    register vector unsigned short tempB, tempC, tempD;
    unsigned long dst_odd        = (unsigned long) dst & 0x0000000F;
    unsigned long src_really_odd = (unsigned long) src & 0x0000000F;
    register vector unsigned short tempA =
        vec_ld(0, (const unsigned short *) ABCD);
    register vector unsigned short Av = vec_splat(tempA, 0);
    register vector unsigned short Bv = vec_splat(tempA, 1);
    register vector unsigned short Cv = vec_splat(tempA, 2);
    register vector unsigned short Dv = vec_splat(tempA, 3);
    register vector unsigned short rounderV =
        vec_splat((vec_u16) vec_lde(0, &rounder_a), 0);

    /* we'll be able to pick-up our 9 char elements at src from those
     * 32 bytes we load the first batch here, as inside the loop we can
     * reuse 'src + stride' from one iteration as the 'src' of the next. */
    register vector unsigned char src_0 = vec_ld(0, src);
    register vector unsigned char src_1 = vec_ld(16, src);
    register vector unsigned char srcvA = vec_perm(src_0, src_1,
                                                   vec_lvsl(0, src));

    if (src_really_odd != 0x0000000F)
        /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned
         * on the second vector. */
        srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
    else
        srcvB = src_1;
    srcvA = vec_mergeh(vczero, srcvA);
    srcvB = vec_mergeh(vczero, srcvB);

    for (i = 0; i < h; i++) {
        dst_odd        =   (unsigned long) dst            & 0x0000000F;
        src_really_odd = (((unsigned long) src) + stride) & 0x0000000F;

        dstv = vec_ld(0, dst);

        /* We'll be able to pick-up our 9 char elements at src + stride from
         * those 32 bytes then reuse the resulting 2 vectors srvcC and srcvD
         * as the next srcvA and srcvB. */
        src_0 = vec_ld(stride +  0, src);
        src_1 = vec_ld(stride + 16, src);
        srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));

        if (src_really_odd != 0x0000000F)
            /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned
             * on the second vector. */
            srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
        else
            srcvD = src_1;

        srcvC = vec_mergeh(vczero, srcvC);
        srcvD = vec_mergeh(vczero, srcvD);

        /* OK, now we (finally) do the math :-)
         * Those four instructions replace 32 int muls & 32 int adds.
         * Isn't AltiVec nice? */
        tempA = vec_mladd((vector unsigned short) srcvA, Av, rounderV);
        tempB = vec_mladd((vector unsigned short) srcvB, Bv, tempA);
        tempC = vec_mladd((vector unsigned short) srcvC, Cv, tempB);
        tempD = vec_mladd((vector unsigned short) srcvD, Dv, tempC);

        srcvA = srcvC;
        srcvB = srcvD;

        tempD = vec_sr(tempD, vcsr8);

        dstv2 = vec_pack(tempD, (vector unsigned short) vczero);

        if (dst_odd)
            dstv2 = vec_perm(dstv, dstv2, vcprm(0, 1, s0, s1));
        else
            dstv2 = vec_perm(dstv, dstv2, vcprm(s0, s1, 2, 3));

        vec_st(dstv2, 0, dst);

        dst += stride;
        src += stride;
    }
}
コード例 #27
0
 SIMD_INLINE v128_u16 Average16(const v128_u16 & s00, const v128_u16 & s01, const v128_u16 & s10, const v128_u16 & s11)
 {
     return vec_sr(vec_add(vec_add(vec_add(s00, s01), vec_add(s10, s11)), K16_0002), K16_0002); 
 }