Example #1
0
static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW,
                           int big_endian, int output_bits)
{
    const int dst_u = -(uintptr_t)dest & 7;
    const int shift = 3;
    const int add = (1 << (shift - 1));
    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);
    vector uint32_t v, v2;
    vector uint16_t vd;
    int i;

    yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0);

    for (i = dst_u; i < dstW - 7; i += 8) {
        v = vec_vsx_ld(0, (const uint32_t *) &src[i]);
        v = vec_add(v, vadd);
        v = vec_sr(v, vshift);

        v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]);
        v2 = vec_add(v2, vadd);
        v2 = vec_sr(v2, vshift);

        vd = vec_packsu(v, v2);
        vd = vec_rl(vd, vswap);

        vec_st(vd, 0, &dest[i]);
    }

    yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i);
}
Example #2
0
static void yuv2plane1_8_vsx(const int16_t *src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
{
    const int dst_u = -(uintptr_t)dest & 15;
    int i, j;
    LOCAL_ALIGNED(16, int16_t, val, [16]);
    const vector uint16_t shifts = (vector uint16_t) {7, 7, 7, 7, 7, 7, 7, 7};
    vector int16_t vi, vileft, ditherleft, ditherright;
    vector uint8_t vd;

    for (j = 0; j < 16; j++) {
        val[j] = dither[(dst_u + offset + j) & 7];
    }

    ditherleft = vec_ld(0, val);
    ditherright = vec_ld(0, &val[8]);

    yuv2plane1_8_u(src, dest, dst_u, dither, offset, 0);

    for (i = dst_u; i < dstW - 15; i += 16) {

        vi = vec_vsx_ld(0, &src[i]);
        vi = vec_adds(ditherleft, vi);
        vileft = vec_sra(vi, shifts);

        vi = vec_vsx_ld(0, &src[i + 8]);
        vi = vec_adds(ditherright, vi);
        vi = vec_sra(vi, shifts);

        vd = vec_packsu(vileft, vi);
        vec_st(vd, 0, &dest[i]);
    }

    yuv2plane1_8_u(src, dest, dstW, dither, offset, i);
}
Example #3
0
void intrinsics_rgb2gray(float* ra, float* ga, float* ba, float* gray ) {
    __vector float c1 = vec_splats(0.3f);
    __vector float c2 = vec_splats(0.59f);
    __vector float c3 = vec_splats(0.11f);

    for(int i = 0; i < N; i+=4) {
        __vector float a = vec_vsx_ld(0, ra+i);
        __vector float b = vec_vsx_ld(0, ga+i);
        __vector float c = vec_vsx_ld(0, ba+i);
        __vector float out = c1 * a  + c2 * b  +  c3 * c ;
        vec_vsx_st(out, 0, gray+i);
    }
}
Example #4
0
static void yuv2plane1_nbps_vsx(const int16_t *src, uint16_t *dest, int dstW,
                           int big_endian, int output_bits)
{
    const int dst_u = -(uintptr_t)dest & 7;
    const int shift = 15 - output_bits;
    const int add = (1 << (shift - 1));
    const int clip = (1 << output_bits) - 1;
    const vector uint16_t vadd = (vector uint16_t) {add, add, add, add, add, add, add, add};
    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
    const vector uint16_t vshift = (vector uint16_t) vec_splat_u16(shift);
    const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip};
    vector uint16_t v;
    int i;

    yuv2plane1_nbps_u(src, dest, dst_u, big_endian, output_bits, 0);

    for (i = dst_u; i < dstW - 7; i += 8) {
        v = vec_vsx_ld(0, (const uint16_t *) &src[i]);
        v = vec_add(v, vadd);
        v = vec_sr(v, vshift);
        v = vec_min(v, vlargest);
        v = vec_rl(v, vswap);
        vec_st(v, 0, &dest[i]);
    }

    yuv2plane1_nbps_u(src, dest, dstW, big_endian, output_bits, i);
}
Example #5
0
static int pix_norm1_altivec(uint8_t *pix, int line_size)
{
    int i, s = 0;
    const vector unsigned int zero =
        (const vector unsigned int) vec_splat_u32(0);
    vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
    vector signed int sum;

    for (i = 0; i < 16; i++) {
        /* Read the potentially unaligned pixels. */
        //vector unsigned char pixl = vec_ld(0,  pix);
        //vector unsigned char pixr = vec_ld(15, pix);
        //vector unsigned char pixv = vec_perm(pixl, pixr, perm);
        vector unsigned char pixv = vec_vsx_ld(0,  pix);

        /* Square the values, and add them to our sum. */
        sv = vec_msum(pixv, pixv, sv);

        pix += line_size;
    }
    /* Sum up the four partial sums, and put the result into s. */
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
    sum = vec_splat(sum, 3);
    vec_vsx_st(sum, 0, &s);
    return s;
}
Example #6
0
static int pix_sum_altivec(uint8_t *pix, int line_size)
{
    int i, s;
    const vector unsigned int zero =
        (const vector unsigned int) vec_splat_u32(0);
    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
    vector signed int sumdiffs;

    for (i = 0; i < 16; i++) {
        /* Read the potentially unaligned 16 pixels into t1. */
        //vector unsigned char pixl = vec_ld(0,  pix);
        //vector unsigned char pixr = vec_ld(15, pix);
        //vector unsigned char t1   = vec_perm(pixl, pixr, perm);
        vector unsigned char t1   = vec_vsx_ld(0,  pix);

        /* Add each 4 pixel group together and put 4 results into sad. */
        sad = vec_sum4s(t1, sad);

        pix += line_size;
    }

    /* Sum up the four partial sums, and put the result into s. */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_vsx_st(sumdiffs, 0, &s);
    return s;
}
Example #7
0
void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 )
{
    vec_s16_t diff[2];
    vec_s32_t sum[2];
    vec_s32_t zero32 = vec_splat_s32(0);
    vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };

    pix_diff( &pix1[0], &pix2[0], diff, 0 );
    pix_diff( &pix1[4*FENC_STRIDE], &pix2[4*FDEC_STRIDE], diff, 1 );

    sum[0] = vec_sum4s( diff[0], zero32 );
    sum[1] = vec_sum4s( diff[1], zero32 );
    diff[0] = vec_packs( sum[0], sum[1] );
    sum[0] = vec_sum4s( diff[0], zero32 );
    diff[0] = vec_packs( sum[0], zero32 );

    diff[1] = vec_vsx_ld( 0, dct );
    diff[0] = vec_perm( diff[0], diff[1], mask );

    vec_vsx_st( diff[0], 0, dct );

    /* 2x2 DC transform */
    int d0 = dct[0] + dct[1];
    int d1 = dct[2] + dct[3];
    int d2 = dct[0] - dct[1];
    int d3 = dct[2] - dct[3];
    dct[0] = d0 + d1;
    dct[1] = d0 - d1;
    dct[2] = d2 + d3;
    dct[3] = d2 - d3;
}
Example #8
0
__attribute__((noinline)) void foo ()
{
  int i;
  vector long long va, vb, vc, vd, tmp;
  volatile unsigned long long three = 3;
  vector unsigned long long threes = vec_splats (three);
  for (i = 0; i < N; i+=2) {
    vb = vec_vsx_ld (0, (vector long long *)&cb[i]);
    vc = vec_vsx_ld (0, (vector long long *)&cc[i]);
    vd = vec_vsx_ld (0, (vector long long *)&cd[i]);
    tmp = vec_add (vb, vc);
    tmp = vec_sub (tmp, vd);
    tmp = vec_sra (tmp, threes);
    x = vec_extract (tmp, 0);
    vec_vsx_st (tmp, 0, (vector long long *)&ca[i]);
  }
}
Example #9
0
static void pix_diff( uint8_t *p1, uint8_t *p2, vec_s16_t *diff, int i )
{
    vec_s16_t pix1v, pix2v, tmp[4];
    vec_u8_t pix1v8, pix2v8;
    LOAD_ZERO;

    for( int j = 0; j < 4; j++ )
    {
        pix1v8 = vec_vsx_ld( 0, p1 );
        pix2v8 = vec_vsx_ld( 0, p2 );
        pix1v = vec_u8_to_s16_h( pix1v8 );
        pix2v = vec_u8_to_s16_h( pix2v8 );
        tmp[j] = vec_sub( pix1v, pix2v );
        p1 += FENC_STRIDE;
        p2 += FDEC_STRIDE;
    }
    diff[i] = vec_add( tmp[0], tmp[1] );
    diff[i] = vec_add( diff[i], tmp[2] );
    diff[i] = vec_add( diff[i], tmp[3] );
}
Example #10
0
scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
			 int order)
{
  int i;
  LOAD_ZERO;
  register vec_s16 vec1;
  register vec_s32 res = vec_splat_s32 (0), t;
  signed int ires;

  for (i = 0; i < order; i += 8) {
    vec1 = vec_vsx_ld (0, v1);
    t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
    res  = vec_sums (t, res);
    v1  += 8;
    v2  += 8;
  }
  res = vec_splat (res, 3);
  vec_ste (res, 0, &ires);

  return ires;
}
Example #11
0
void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] )
{
    vec_s16_t dcv;
    vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) );
    vec_u16_t v6 = vec_splat_u16( 6 );
    vec_s16_t dctv = vec_vsx_ld( 0, dct );

    dctv = vec_sra( vec_add( dctv, v32 ), v6 );
    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) );
    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );
    idct8_dc_altivec( &p_dst[0], dcv );
    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) );
    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );
    idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv );
}
Example #12
0
void test1() {
// CHECK-LABEL: define void @test1
// CHECK-LE-LABEL: define void @test1

  res_vf = vec_abs(vf);
// CHECK: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}})

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_add(vd, vd);
// CHECK: fadd <2 x double>
// CHECK-LE: fadd <2 x double>

  res_vd = vec_and(vbll, vd);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
// CHECK-LE: and <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  res_vd = vec_and(vd, vbll);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
// CHECK-LE: and <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  res_vd = vec_and(vd, vd);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
// CHECK-LE: and <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_andc(vbll, vd);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
// CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK-LE: and <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_andc(vd, vbll);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
// CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK-LE: and <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_andc(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_ceil(vd);
// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})

  res_vf = vec_ceil(vf);
// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpeq(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpeq(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpge(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpge(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpgt(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpgt(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmple(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmple(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmplt(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmplt(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  /* vec_cpsgn */
  res_vf = vec_cpsgn(vf, vf);
// CHECK: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}})
// CHECK-LE: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}})

  res_vd = vec_cpsgn(vd, vd);
// CHECK: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}})
// CHECK-LE: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}})

  /* vec_div */
  res_vsll = vec_div(vsll, vsll);
// CHECK: sdiv <2 x i64>
// CHECK-LE: sdiv <2 x i64>

  res_vull = vec_div(vull, vull);
// CHECK: udiv <2 x i64>
// CHECK-LE: udiv <2 x i64>

  res_vf = vec_div(vf, vf);
// CHECK: fdiv <4 x float>
// CHECK-LE: fdiv <4 x float>

  res_vd = vec_div(vd, vd);
// CHECK: fdiv <2 x double>
// CHECK-LE: fdiv <2 x double>

  /* vec_max */
  res_vf = vec_max(vf, vf);
// CHECK: @llvm.ppc.vsx.xvmaxsp
// CHECK-LE: @llvm.ppc.vsx.xvmaxsp

  res_vd = vec_max(vd, vd);
// CHECK: @llvm.ppc.vsx.xvmaxdp
// CHECK-LE: @llvm.ppc.vsx.xvmaxdp

  res_vf = vec_vmaxfp(vf, vf);
// CHECK: @llvm.ppc.vsx.xvmaxsp
// CHECK-LE: @llvm.ppc.vsx.xvmaxsp

  /* vec_min */
  res_vf = vec_min(vf, vf);
// CHECK: @llvm.ppc.vsx.xvminsp
// CHECK-LE: @llvm.ppc.vsx.xvminsp

  res_vd = vec_min(vd, vd);
// CHECK: @llvm.ppc.vsx.xvmindp
// CHECK-LE: @llvm.ppc.vsx.xvmindp

  res_vf = vec_vminfp(vf, vf);
// CHECK: @llvm.ppc.vsx.xvminsp
// CHECK-LE: @llvm.ppc.vsx.xvminsp

  res_d = __builtin_vsx_xsmaxdp(d, d);
// CHECK: @llvm.ppc.vsx.xsmaxdp
// CHECK-LE: @llvm.ppc.vsx.xsmaxdp

  res_d = __builtin_vsx_xsmindp(d, d);
// CHECK: @llvm.ppc.vsx.xsmindp
// CHECK-LE: @llvm.ppc.vsx.xsmindp

  /* vec_perm */
  res_vsll = vec_perm(vsll, vsll, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_perm(vull, vull, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vbll = vec_perm(vbll, vbll, vuc);
// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
// CHECK-LE: xor <16 x i8>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>

  res_vf = vec_round(vf);
// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float>
// CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float>

  res_vd = vec_round(vd);
// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double>

  res_vd = vec_perm(vd, vd, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vd = vec_splat(vd, 1);
// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
// CHECK-LE: xor <16 x i8>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>

  res_vbll = vec_splat(vbll, 1);
// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
// CHECK-LE: xor <16 x i8>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>

  res_vsll =  vec_splat(vsll, 1);
// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
// CHECK-LE: xor <16 x i8>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>

  res_vull =  vec_splat(vull, 1);
// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
// CHECK-LE: xor <16 x i8>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>

  res_vsi = vec_pack(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vui = vec_pack(vull, vull);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vbi = vec_pack(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vsll = vec_vperm(vsll, vsll, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_vperm(vull, vull, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vd = vec_vperm(vd, vd, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  /* vec_vsx_ld */

  res_vsi = vec_vsx_ld(0, &vsi);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vui = vec_vsx_ld(0, &vui);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vf = vec_vsx_ld (0, &vf);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vsll = vec_vsx_ld(0, &vsll);
// CHECK: @llvm.ppc.vsx.lxvd2x
// CHECK-LE: @llvm.ppc.vsx.lxvd2x

  res_vull = vec_vsx_ld(0, &vull);
// CHECK: @llvm.ppc.vsx.lxvd2x
// CHECK-LE: @llvm.ppc.vsx.lxvd2x

  res_vd = vec_vsx_ld(0, &vd);
// CHECK: @llvm.ppc.vsx.lxvd2x
// CHECK-LE: @llvm.ppc.vsx.lxvd2x

  res_vull = vec_vsx_ld(0, &vull);
// CHECK: @llvm.ppc.vsx.lxvd2x
// CHECK-LE: @llvm.ppc.vsx.lxvd2x

  res_vd = vec_vsx_ld(0, &vd);
// CHECK: @llvm.ppc.vsx.lxvd2x
// CHECK-LE: @llvm.ppc.vsx.lxvd2x

  res_vss = vec_vsx_ld(0, &vss);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vss = vec_vsx_ld(0, &ss);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vus = vec_vsx_ld(0, &vus);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vus = vec_vsx_ld(0, &us);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vbc = vec_vsx_ld(0, &vbc);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vsc = vec_vsx_ld(0, &vsc);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vuc = vec_vsx_ld(0, &vuc);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vsc = vec_vsx_ld(0, &sc);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  res_vuc = vec_vsx_ld(0, &uc);
// CHECK: @llvm.ppc.vsx.lxvw4x
// CHECK-LE: @llvm.ppc.vsx.lxvw4x

  /* vec_vsx_st */

  vec_vsx_st(vsi, 0, &res_vsi);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vsi, 0, &res_si);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vui, 0, &res_vui);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vui, 0, &res_ui);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vf, 0, &res_vf);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vsll, 0, &res_vsll);
// CHECK: @llvm.ppc.vsx.stxvd2x
// CHECK-LE: @llvm.ppc.vsx.stxvd2x

  vec_vsx_st(vull, 0, &res_vull);
// CHECK: @llvm.ppc.vsx.stxvd2x
// CHECK-LE: @llvm.ppc.vsx.stxvd2x

  vec_vsx_st(vd, 0, &res_vd);
// CHECK: @llvm.ppc.vsx.stxvd2x
// CHECK-LE: @llvm.ppc.vsx.stxvd2x

  vec_vsx_st(vss, 0, &res_vss);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vss, 0, &res_ss);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vus, 0, &res_vus);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vus, 0, &res_us);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vsc, 0, &res_vsc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vsc, 0, &res_sc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vuc, 0, &res_vuc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vuc, 0, &res_uc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vbc, 0, &res_vbc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vbc, 0, &res_sc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vbc, 0, &res_uc);
// CHECK: @llvm.ppc.vsx.stxvw4x
// CHECK-LE: @llvm.ppc.vsx.stxvw4x

  /* vec_and */
  res_vsll = vec_and(vsll, vsll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_and(vbll, vsll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_and(vsll, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_and(vull, vull);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_and(vbll, vull);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_and(vull, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vbll = vec_and(vbll, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  /* vec_vand */
  res_vsll = vec_vand(vsll, vsll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_vand(vbll, vsll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_vand(vsll, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_vand(vull, vull);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_vand(vbll, vull);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_vand(vull, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vbll = vec_vand(vbll, vbll);
// CHECK: and <2 x i64>
// CHECK-LE: and <2 x i64>

  /* vec_andc */
  res_vsll = vec_andc(vsll, vsll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_andc(vbll, vsll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vsll = vec_andc(vsll, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_andc(vull, vull);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_andc(vbll, vull);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vull = vec_andc(vull, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vbll = vec_andc(vbll, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>
// CHECK-LE: xor <2 x i64>
// CHECK-LE: and <2 x i64>

  res_vf = vec_floor(vf);
// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_floor(vd);
// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_madd(vf, vf, vf);
// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})

  res_vd = vec_madd(vd, vd, vd);
// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})

  /* vec_mergeh */
  res_vsll = vec_mergeh(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vsll = vec_mergeh(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vsll = vec_mergeh(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergeh(vull, vull);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergeh(vull, vbll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergeh(vbll, vull);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  /* vec_mergel */
  res_vsll = vec_mergel(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vsll = vec_mergel(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vsll = vec_mergel(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergel(vull, vull);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergel(vull, vbll);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  res_vull = vec_mergel(vbll, vull);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm

  /* vec_msub */
  res_vf = vec_msub(vf, vf, vf);
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>

  res_vd = vec_msub(vd, vd, vd);
// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
// CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-LE-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>

  res_vsll = vec_mul(vsll, vsll);
// CHECK: mul <2 x i64>
// CHECK-LE: mul <2 x i64>

  res_vull = vec_mul(vull, vull);
// CHECK: mul <2 x i64>
// CHECK-LE: mul <2 x i64>

  res_vf = vec_mul(vf, vf);
// CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}

  res_vd = vec_mul(vd, vd);
// CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}

  res_vf = vec_nearbyint(vf);
// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_nearbyint(vd);
// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_nmadd(vf, vf, vf);
// CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
// CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]
// CHECK-LE: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
// CHECK-LE-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]

  res_vd = vec_nmadd(vd, vd, vd);
// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
// CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]

  res_vf = vec_nmsub(vf, vf, vf);
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}

  res_vd = vec_nmsub(vd, vd, vd);
// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
// CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
// CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]

  /* vec_nor */
  res_vsll = vec_nor(vsll, vsll);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>
// CHECK-LE: or <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_nor(vull, vull);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>
// CHECK-LE: or <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_nor(vbll, vbll);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>
// CHECK-LE: or <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vd = vec_nor(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>
// CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>

  /* vec_or */
  res_vsll = vec_or(vsll, vsll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vsll = vec_or(vbll, vsll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vsll = vec_or(vsll, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_or(vull, vull);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_or(vbll, vull);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_or(vull, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vbll = vec_or(vbll, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vd = vec_or(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK-LE: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}

  res_vd = vec_or(vbll, vd);
// CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]]
// CHECK: bitcast <2 x i64> [[T2]] to <2 x double>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK-LE: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]]
// CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double>

  res_vd = vec_or(vd, vbll);
// CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[T2]] to <2 x double>
// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK-LE: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}}
// CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double>

  res_vf = vec_re(vf);
// CHECK: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float>
// CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float>

  res_vd = vec_re(vd);
// CHECK: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double>
// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double>

  res_vf = vec_rint(vf);
// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_rint(vd);
// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_rsqrte(vf);
// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})

  res_vd = vec_rsqrte(vd);
// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vf = vec_sel(vd, vd, vbll);
// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64> %{{[0-9]+}},
// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: or <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK-LE: and <2 x i64> %{{[0-9]+}},
// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: or <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_sel(vd, vd, vull);
// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64> %{{[0-9]+}},
// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: or <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK-LE: and <2 x i64> %{{[0-9]+}},
// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: or <2 x i64>
// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>

  res_vf = vec_sqrt(vf);
// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_sqrt(vd);
// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})

  res_vd = vec_sub(vd, vd);
// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}

  res_vf = vec_trunc(vf);
// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})
// CHECK-LE: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_trunc(vd);
// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})

  /* vec_vor */
  res_vsll = vec_vor(vsll, vsll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vsll = vec_vor(vbll, vsll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vsll = vec_vor(vsll, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_vor(vull, vull);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_vor(vbll, vull);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vull = vec_vor(vull, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  res_vbll = vec_vor(vbll, vbll);
// CHECK: or <2 x i64>
// CHECK-LE: or <2 x i64>

  /* vec_xor */
  res_vsll = vec_xor(vsll, vsll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vsll = vec_xor(vbll, vsll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vsll = vec_xor(vsll, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_xor(vull, vull);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_xor(vbll, vull);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_xor(vull, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vbll = vec_xor(vbll, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_xor(vd, vd);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_xor(vd, vbll);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>

  dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()

  res_vd = vec_xor(vbll, vd);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>

  /* vec_vxor */
  res_vsll = vec_vxor(vsll, vsll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vsll = vec_vxor(vbll, vsll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vsll = vec_vxor(vsll, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_vxor(vull, vull);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_vxor(vbll, vull);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vull = vec_vxor(vull, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vbll = vec_vxor(vbll, vbll);
// CHECK: xor <2 x i64>
// CHECK-LE: xor <2 x i64>

  res_vsll = vec_cts(vd, 0);
// CHECK: fmul <2 x double>
// CHECK: fptosi <2 x double> %{{.*}} to <2 x i64>
// CHECK-LE: fmul <2 x double>
// CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64>

  res_vsll = vec_cts(vd, 31);
// CHECK: fmul <2 x double>
// CHECK: fptosi <2 x double> %{{.*}} to <2 x i64>
// CHECK-LE: fmul <2 x double>
// CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64>

  res_vsll = vec_ctu(vd, 0);
// CHECK: fmul <2 x double>
// CHECK: fptoui <2 x double> %{{.*}} to <2 x i64>
// CHECK-LE: fmul <2 x double>
// CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64>

  res_vsll = vec_ctu(vd, 31);
// CHECK: fmul <2 x double>
// CHECK: fptoui <2 x double> %{{.*}} to <2 x i64>
// CHECK-LE: fmul <2 x double>
// CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64>

  res_vd = vec_ctf(vsll, 0);
// CHECK: sitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK: fmul <2 x double>
// CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK-LE: fmul <2 x double>

  res_vd = vec_ctf(vsll, 31);
// CHECK: sitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK: fmul <2 x double>
// CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK-LE: fmul <2 x double>

  res_vd = vec_ctf(vull, 0);
// CHECK: uitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK: fmul <2 x double>
// CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK-LE: fmul <2 x double>

  res_vd = vec_ctf(vull, 31);
// CHECK: uitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK: fmul <2 x double>
// CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double>
// CHECK-LE: fmul <2 x double>
}
Example #13
0
void f (void *p)
{
  vector unsigned int u32 = vec_vsx_ld (1, (const unsigned int *)p);
  vec_ste (u32, 1, (unsigned int *)p);
}
Example #14
0
void test1() {
// CHECK-LABEL: define void @test1

  res_vd = vec_add(vd, vd);
// CHECK: fadd <2 x double>

  res_vd = vec_and(vbll, vd);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  res_vd = vec_and(vd, vbll);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  res_vd = vec_and(vd, vd);
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_andc(vbll, vd);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_andc(vd, vbll);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_andc(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_ceil(vd);
// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})

  res_vf = vec_ceil(vf);
// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpeq(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpeq(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpge(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpge(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmpgt(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmpgt(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmple(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmple(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  res_vbll = vec_cmplt(vd, vd);
// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})

  res_vbi = vec_cmplt(vf, vf);
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})

  /* vec_div */
  res_vf = vec_div(vf, vf);
// CHECK: @llvm.ppc.vsx.xvdivsp

  res_vd = vec_div(vd, vd);
// CHECK: @llvm.ppc.vsx.xvdivdp

  /* vec_max */
  res_vf = vec_max(vf, vf);
// CHECK: @llvm.ppc.vsx.xvmaxsp

  res_vd = vec_max(vd, vd);
// CHECK: @llvm.ppc.vsx.xvmaxdp

  res_vf = vec_vmaxfp(vf, vf);
// CHECK: @llvm.ppc.vsx.xvmaxsp

  /* vec_min */
  res_vf = vec_min(vf, vf);
// CHECK: @llvm.ppc.vsx.xvminsp

  res_vd = vec_min(vd, vd);
// CHECK: @llvm.ppc.vsx.xvmindp

  res_vf = vec_vminfp(vf, vf);
// CHECK: @llvm.ppc.vsx.xvminsp

  res_d = __builtin_vsx_xsmaxdp(d, d);
// CHECK: @llvm.ppc.vsx.xsmaxdp

  res_d = __builtin_vsx_xsmindp(d, d);
// CHECK: @llvm.ppc.vsx.xsmindp

  /* vec_perm */
  res_vsll = vec_perm(vsll, vsll, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  res_vull = vec_perm(vull, vull, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  res_vd = vec_perm(vd, vd, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  res_vsll = vec_vperm(vsll, vsll, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  res_vull = vec_vperm(vull, vull, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  res_vd = vec_vperm(vd, vd, vuc);
// CHECK: @llvm.ppc.altivec.vperm

  /* vec_vsx_ld */

  res_vsi = vec_vsx_ld(0, &vsi);
// CHECK: @llvm.ppc.vsx.lxvw4x

  res_vui = vec_vsx_ld(0, &vui);
// CHECK: @llvm.ppc.vsx.lxvw4x

  res_vf = vec_vsx_ld (0, &vf);
// CHECK: @llvm.ppc.vsx.lxvw4x

  res_vsll = vec_vsx_ld(0, &vsll);
// CHECK: @llvm.ppc.vsx.lxvd2x

  res_vull = vec_vsx_ld(0, &vull);
// CHECK: @llvm.ppc.vsx.lxvd2x

  res_vd = vec_vsx_ld(0, &vd);
// CHECK: @llvm.ppc.vsx.lxvd2x

  /* vec_vsx_st */

  vec_vsx_st(vsi, 0, &res_vsi);
// CHECK: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vui, 0, &res_vui);
// CHECK: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vf, 0, &res_vf);
// CHECK: @llvm.ppc.vsx.stxvw4x

  vec_vsx_st(vsll, 0, &res_vsll);
// CHECK: @llvm.ppc.vsx.stxvd2x

  vec_vsx_st(vull, 0, &res_vull);
// CHECK: @llvm.ppc.vsx.stxvd2x

  vec_vsx_st(vd, 0, &res_vd);
// CHECK: @llvm.ppc.vsx.stxvd2x

  /* vec_and */
  res_vsll = vec_and(vsll, vsll);
// CHECK: and <2 x i64>

  res_vsll = vec_and(vbll, vsll);
// CHECK: and <2 x i64>

  res_vsll = vec_and(vsll, vbll);
// CHECK: and <2 x i64>

  res_vull = vec_and(vull, vull);
// CHECK: and <2 x i64>

  res_vull = vec_and(vbll, vull);
// CHECK: and <2 x i64>

  res_vull = vec_and(vull, vbll);
// CHECK: and <2 x i64>

  res_vbll = vec_and(vbll, vbll);
// CHECK: and <2 x i64>

  /* vec_vand */
  res_vsll = vec_vand(vsll, vsll);
// CHECK: and <2 x i64>

  res_vsll = vec_vand(vbll, vsll);
// CHECK: and <2 x i64>

  res_vsll = vec_vand(vsll, vbll);
// CHECK: and <2 x i64>

  res_vull = vec_vand(vull, vull);
// CHECK: and <2 x i64>

  res_vull = vec_vand(vbll, vull);
// CHECK: and <2 x i64>

  res_vull = vec_vand(vull, vbll);
// CHECK: and <2 x i64>

  res_vbll = vec_vand(vbll, vbll);
// CHECK: and <2 x i64>

  /* vec_andc */
  res_vsll = vec_andc(vsll, vsll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vsll = vec_andc(vbll, vsll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vsll = vec_andc(vsll, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vull = vec_andc(vull, vull);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vull = vec_andc(vbll, vull);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vull = vec_andc(vull, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vbll = vec_andc(vbll, vbll);
// CHECK: xor <2 x i64>
// CHECK: and <2 x i64>

  res_vf = vec_floor(vf);
// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_floor(vd);
// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_madd(vf, vf, vf);
// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})

  res_vd = vec_madd(vd, vd, vd);
// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})

  res_vf = vec_msub(vf, vf, vf);
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>

  res_vd = vec_msub(vd, vd, vd);
// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>

  res_vf = vec_mul(vf, vf);
// CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}

  res_vd = vec_mul(vd, vd);
// CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}

  res_vf = vec_nearbyint(vf);
// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_nearbyint(vd);
// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_nmadd(vf, vf, vf);
// CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
// CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]

  res_vd = vec_nmadd(vd, vd, vd);
// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]

  res_vf = vec_nmsub(vf, vf, vf);
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}

  res_vd = vec_nmsub(vd, vd, vd);
// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]

  /* vec_nor */
  res_vsll = vec_nor(vsll, vsll);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>

  res_vull = vec_nor(vull, vull);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>

  res_vull = vec_nor(vbll, vbll);
// CHECK: or <2 x i64>
// CHECK: xor <2 x i64>

  res_vd = vec_nor(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>

  /* vec_or */
  res_vsll = vec_or(vsll, vsll);
// CHECK: or <2 x i64>

  res_vsll = vec_or(vbll, vsll);
// CHECK: or <2 x i64>

  res_vsll = vec_or(vsll, vbll);
// CHECK: or <2 x i64>

  res_vull = vec_or(vull, vull);
// CHECK: or <2 x i64>

  res_vull = vec_or(vbll, vull);
// CHECK: or <2 x i64>

  res_vull = vec_or(vull, vbll);
// CHECK: or <2 x i64>

  res_vbll = vec_or(vbll, vbll);
// CHECK: or <2 x i64>

  res_vd = vec_or(vd, vd);
// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
// CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}

  res_vf = vec_rint(vf);
// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_rint(vd);
// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})

  res_vf = vec_rsqrte(vf);
// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})

  res_vd = vec_rsqrte(vd);
// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})

  dummy();
// CHECK: call void @dummy()

  res_vf = vec_sel(vd, vd, vbll);
// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64> %{{[0-9]+}},
// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: or <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_sel(vd, vd, vull);
// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
// CHECK: and <2 x i64> %{{[0-9]+}},
// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: or <2 x i64>
// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>

  res_vf = vec_sqrt(vf);
// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_sqrt(vd);
// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})

  res_vd = vec_sub(vd, vd);
// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}

  res_vf = vec_trunc(vf);
// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})

  res_vd = vec_trunc(vd);
// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})

  /* vec_vor */
  res_vsll = vec_vor(vsll, vsll);
// CHECK: or <2 x i64>

  res_vsll = vec_vor(vbll, vsll);
// CHECK: or <2 x i64>

  res_vsll = vec_vor(vsll, vbll);
// CHECK: or <2 x i64>

  res_vull = vec_vor(vull, vull);
// CHECK: or <2 x i64>

  res_vull = vec_vor(vbll, vull);
// CHECK: or <2 x i64>

  res_vull = vec_vor(vull, vbll);
// CHECK: or <2 x i64>

  res_vbll = vec_vor(vbll, vbll);
// CHECK: or <2 x i64>

  /* vec_xor */
  res_vsll = vec_xor(vsll, vsll);
// CHECK: xor <2 x i64>

  res_vsll = vec_xor(vbll, vsll);
// CHECK: xor <2 x i64>

  res_vsll = vec_xor(vsll, vbll);
// CHECK: xor <2 x i64>

  res_vull = vec_xor(vull, vull);
// CHECK: xor <2 x i64>

  res_vull = vec_xor(vbll, vull);
// CHECK: xor <2 x i64>

  res_vull = vec_xor(vull, vbll);
// CHECK: xor <2 x i64>

  res_vbll = vec_xor(vbll, vbll);
// CHECK: xor <2 x i64>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_xor(vd, vd);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_xor(vd, vbll);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>

  dummy();
// CHECK: call void @dummy()

  res_vd = vec_xor(vbll, vd);
// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>

  /* vec_vxor */
  res_vsll = vec_vxor(vsll, vsll);
// CHECK: xor <2 x i64>

  res_vsll = vec_vxor(vbll, vsll);
// CHECK: xor <2 x i64>

  res_vsll = vec_vxor(vsll, vbll);
// CHECK: xor <2 x i64>

  res_vull = vec_vxor(vull, vull);
// CHECK: xor <2 x i64>

  res_vull = vec_vxor(vbll, vull);
// CHECK: xor <2 x i64>

  res_vull = vec_vxor(vull, vbll);
// CHECK: xor <2 x i64>

  res_vbll = vec_vxor(vbll, vbll);
// CHECK: xor <2 x i64>

}
Example #15
0
void x264_zigzag_interleave_8x8_cavlc_altivec( int16_t *dst, int16_t *src, uint8_t *nnz )
{
    vec_s16_t tmpv[8];
    vec_s16_t merge[2];
    vec_s16_t permv[2];
    vec_s16_t orv[4];
    vec_s16_t src0v = vec_ld( 0*16, src );
    vec_s16_t src1v = vec_ld( 1*16, src );
    vec_s16_t src2v = vec_ld( 2*16, src );
    vec_s16_t src3v = vec_ld( 3*16, src );
    vec_s16_t src4v = vec_ld( 4*16, src );
    vec_s16_t src5v = vec_ld( 5*16, src );
    vec_s16_t src6v = vec_ld( 6*16, src );
    vec_s16_t src7v = vec_ld( 7*16, src );
    vec_u8_t pack;
    vec_u8_t nnzv = vec_vsx_ld( 0, nnz );
    vec_u8_t shift = vec_splat_u8( 7 );
    LOAD_ZERO;

    const vec_u8_t mask[3] = {
        { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
        { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F },
        { 0x10, 0x11, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x12, 0x13, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F }
    };

    tmpv[0] = vec_mergeh( src0v, src1v );
    tmpv[1] = vec_mergel( src0v, src1v );

    tmpv[2] = vec_mergeh( src2v, src3v );
    tmpv[3] = vec_mergel( src2v, src3v );

    tmpv[4] = vec_mergeh( src4v, src5v );
    tmpv[5] = vec_mergel( src4v, src5v );

    tmpv[6] = vec_mergeh( src6v, src7v );
    tmpv[7] = vec_mergel( src6v, src7v );

    merge[0] = vec_mergeh( tmpv[0], tmpv[1] );
    merge[1] = vec_mergeh( tmpv[2], tmpv[3] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[1] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 0*16, dst );

    merge[0] = vec_mergeh( tmpv[4], tmpv[5] );
    merge[1] = vec_mergeh( tmpv[6], tmpv[7] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[2] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 1*16, dst );
    vec_st( permv[1], 2*16, dst );
    vec_st( permv[2], 3*16, dst );

    merge[0] = vec_mergel( tmpv[0], tmpv[1] );
    merge[1] = vec_mergel( tmpv[2], tmpv[3] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[1] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 4*16, dst );

    merge[0] = vec_mergel( tmpv[4], tmpv[5] );
    merge[1] = vec_mergel( tmpv[6], tmpv[7] );
    permv[0] = vec_perm( merge[0], merge[1], mask[0] );
    permv[2] = vec_perm( merge[0], merge[1], mask[1] );
    vec_st( permv[0], 5*16, dst );
    vec_st( permv[1], 6*16, dst );
    vec_st( permv[2], 7*16, dst );

    orv[0] = vec_or( src0v, src1v );
    orv[1] = vec_or( src2v, src3v );
    orv[2] = vec_or( src4v, src5v );
    orv[3] = vec_or( src6v, src7v );

    permv[0] = vec_or( orv[0], orv[1] );
    permv[1] = vec_or( orv[2], orv[3] );
    permv[0] = vec_or( permv[0], permv[1] );

    permv[1] = vec_perm( permv[0], permv[0], mask[1] );
    permv[0] = vec_or( permv[0], permv[1] );

    pack = (vec_u8_t)vec_packs( permv[0], permv[0] );
    pack = (vec_u8_t)vec_cmpeq( pack, zerov );
    pack = vec_nor( pack, zerov );
    pack = vec_sr( pack, shift );
    nnzv = vec_perm( nnzv, pack, mask[2] );
    vec_st( nnzv, 0, nnz );
}