static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW, int big_endian, int output_bits) { const int dst_u = -(uintptr_t)dest & 7; const int shift = 3; const int add = (1 << (shift - 1)); const vector uint32_t vadd = (vector uint32_t) {add, add, add, add}; const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0); const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift); vector uint32_t v, v2; vector uint16_t vd; int i; yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0); for (i = dst_u; i < dstW - 7; i += 8) { v = vec_vsx_ld(0, (const uint32_t *) &src[i]); v = vec_add(v, vadd); v = vec_sr(v, vshift); v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]); v2 = vec_add(v2, vadd); v2 = vec_sr(v2, vshift); vd = vec_packsu(v, v2); vd = vec_rl(vd, vswap); vec_st(vd, 0, &dest[i]); } yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i); }
static void yuv2plane1_8_vsx(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset) { const int dst_u = -(uintptr_t)dest & 15; int i, j; LOCAL_ALIGNED(16, int16_t, val, [16]); const vector uint16_t shifts = (vector uint16_t) {7, 7, 7, 7, 7, 7, 7, 7}; vector int16_t vi, vileft, ditherleft, ditherright; vector uint8_t vd; for (j = 0; j < 16; j++) { val[j] = dither[(dst_u + offset + j) & 7]; } ditherleft = vec_ld(0, val); ditherright = vec_ld(0, &val[8]); yuv2plane1_8_u(src, dest, dst_u, dither, offset, 0); for (i = dst_u; i < dstW - 15; i += 16) { vi = vec_vsx_ld(0, &src[i]); vi = vec_adds(ditherleft, vi); vileft = vec_sra(vi, shifts); vi = vec_vsx_ld(0, &src[i + 8]); vi = vec_adds(ditherright, vi); vi = vec_sra(vi, shifts); vd = vec_packsu(vileft, vi); vec_st(vd, 0, &dest[i]); } yuv2plane1_8_u(src, dest, dstW, dither, offset, i); }
void intrinsics_rgb2gray(float* ra, float* ga, float* ba, float* gray ) { __vector float c1 = vec_splats(0.3f); __vector float c2 = vec_splats(0.59f); __vector float c3 = vec_splats(0.11f); for(int i = 0; i < N; i+=4) { __vector float a = vec_vsx_ld(0, ra+i); __vector float b = vec_vsx_ld(0, ga+i); __vector float c = vec_vsx_ld(0, ba+i); __vector float out = c1 * a + c2 * b + c3 * c ; vec_vsx_st(out, 0, gray+i); } }
static void yuv2plane1_nbps_vsx(const int16_t *src, uint16_t *dest, int dstW, int big_endian, int output_bits) { const int dst_u = -(uintptr_t)dest & 7; const int shift = 15 - output_bits; const int add = (1 << (shift - 1)); const int clip = (1 << output_bits) - 1; const vector uint16_t vadd = (vector uint16_t) {add, add, add, add, add, add, add, add}; const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0); const vector uint16_t vshift = (vector uint16_t) vec_splat_u16(shift); const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip}; vector uint16_t v; int i; yuv2plane1_nbps_u(src, dest, dst_u, big_endian, output_bits, 0); for (i = dst_u; i < dstW - 7; i += 8) { v = vec_vsx_ld(0, (const uint16_t *) &src[i]); v = vec_add(v, vadd); v = vec_sr(v, vshift); v = vec_min(v, vlargest); v = vec_rl(v, vswap); vec_st(v, 0, &dest[i]); } yuv2plane1_nbps_u(src, dest, dstW, big_endian, output_bits, i); }
static int pix_norm1_altivec(uint8_t *pix, int line_size) { int i, s = 0; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); vector signed int sum; for (i = 0; i < 16; i++) { /* Read the potentially unaligned pixels. */ //vector unsigned char pixl = vec_ld(0, pix); //vector unsigned char pixr = vec_ld(15, pix); //vector unsigned char pixv = vec_perm(pixl, pixr, perm); vector unsigned char pixv = vec_vsx_ld(0, pix); /* Square the values, and add them to our sum. */ sv = vec_msum(pixv, pixv, sv); pix += line_size; } /* Sum up the four partial sums, and put the result into s. */ sum = vec_sums((vector signed int) sv, (vector signed int) zero); sum = vec_splat(sum, 3); vec_vsx_st(sum, 0, &s); return s; }
static int pix_sum_altivec(uint8_t *pix, int line_size) { int i, s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; for (i = 0; i < 16; i++) { /* Read the potentially unaligned 16 pixels into t1. */ //vector unsigned char pixl = vec_ld(0, pix); //vector unsigned char pixr = vec_ld(15, pix); //vector unsigned char t1 = vec_perm(pixl, pixr, perm); vector unsigned char t1 = vec_vsx_ld(0, pix); /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t1, sad); pix += line_size; } /* Sum up the four partial sums, and put the result into s. */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); sumdiffs = vec_splat(sumdiffs, 3); vec_vsx_st(sumdiffs, 0, &s); return s; }
void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ) { vec_s16_t diff[2]; vec_s32_t sum[2]; vec_s32_t zero32 = vec_splat_s32(0); vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; pix_diff( &pix1[0], &pix2[0], diff, 0 ); pix_diff( &pix1[4*FENC_STRIDE], &pix2[4*FDEC_STRIDE], diff, 1 ); sum[0] = vec_sum4s( diff[0], zero32 ); sum[1] = vec_sum4s( diff[1], zero32 ); diff[0] = vec_packs( sum[0], sum[1] ); sum[0] = vec_sum4s( diff[0], zero32 ); diff[0] = vec_packs( sum[0], zero32 ); diff[1] = vec_vsx_ld( 0, dct ); diff[0] = vec_perm( diff[0], diff[1], mask ); vec_vsx_st( diff[0], 0, dct ); /* 2x2 DC transform */ int d0 = dct[0] + dct[1]; int d1 = dct[2] + dct[3]; int d2 = dct[0] - dct[1]; int d3 = dct[2] - dct[3]; dct[0] = d0 + d1; dct[1] = d0 - d1; dct[2] = d2 + d3; dct[3] = d2 - d3; }
__attribute__((noinline)) void foo () { int i; vector long long va, vb, vc, vd, tmp; volatile unsigned long long three = 3; vector unsigned long long threes = vec_splats (three); for (i = 0; i < N; i+=2) { vb = vec_vsx_ld (0, (vector long long *)&cb[i]); vc = vec_vsx_ld (0, (vector long long *)&cc[i]); vd = vec_vsx_ld (0, (vector long long *)&cd[i]); tmp = vec_add (vb, vc); tmp = vec_sub (tmp, vd); tmp = vec_sra (tmp, threes); x = vec_extract (tmp, 0); vec_vsx_st (tmp, 0, (vector long long *)&ca[i]); } }
static void pix_diff( uint8_t *p1, uint8_t *p2, vec_s16_t *diff, int i ) { vec_s16_t pix1v, pix2v, tmp[4]; vec_u8_t pix1v8, pix2v8; LOAD_ZERO; for( int j = 0; j < 4; j++ ) { pix1v8 = vec_vsx_ld( 0, p1 ); pix2v8 = vec_vsx_ld( 0, p2 ); pix1v = vec_u8_to_s16_h( pix1v8 ); pix2v = vec_u8_to_s16_h( pix2v8 ); tmp[j] = vec_sub( pix1v, pix2v ); p1 += FENC_STRIDE; p2 += FDEC_STRIDE; } diff[i] = vec_add( tmp[0], tmp[1] ); diff[i] = vec_add( diff[i], tmp[2] ); diff[i] = vec_add( diff[i], tmp[3] ); }
scalarproduct_int16_vsx (const signed short *v1, const signed short *v2, int order) { int i; LOAD_ZERO; register vec_s16 vec1; register vec_s32 res = vec_splat_s32 (0), t; signed int ires; for (i = 0; i < order; i += 8) { vec1 = vec_vsx_ld (0, v1); t = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v); res = vec_sums (t, res); v1 += 8; v2 += 8; } res = vec_splat (res, 3); vec_ste (res, 0, &ires); return ires; }
void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ) { vec_s16_t dcv; vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) ); vec_u16_t v6 = vec_splat_u16( 6 ); vec_s16_t dctv = vec_vsx_ld( 0, dct ); dctv = vec_sra( vec_add( dctv, v32 ), v6 ); dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) ); dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv ); idct8_dc_altivec( &p_dst[0], dcv ); dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) ); dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv ); idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv ); }
void test1() { // CHECK-LABEL: define void @test1 // CHECK-LE-LABEL: define void @test1 res_vf = vec_abs(vf); // CHECK: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}}) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_add(vd, vd); // CHECK: fadd <2 x double> // CHECK-LE: fadd <2 x double> res_vd = vec_and(vbll, vd); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> res_vd = vec_and(vd, vbll); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> res_vd = vec_and(vd, vd); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_andc(vbll, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_andc(vd, vbll); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_andc(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_ceil(vd); // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}}) res_vf = vec_ceil(vf); // CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}}) res_vbll = vec_cmpeq(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpeq(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmpge(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpge(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmpgt(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpgt(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmple(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmple(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmplt(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmplt(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) /* vec_cpsgn */ res_vf = vec_cpsgn(vf, vf); // CHECK: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}}) // CHECK-LE: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}}) res_vd = vec_cpsgn(vd, vd); // CHECK: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}}) // CHECK-LE: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}}) /* vec_div */ res_vsll = vec_div(vsll, vsll); // CHECK: sdiv <2 x i64> // CHECK-LE: sdiv <2 x i64> res_vull = vec_div(vull, vull); // CHECK: udiv <2 x i64> // CHECK-LE: udiv <2 x i64> res_vf = vec_div(vf, vf); // CHECK: fdiv <4 x float> // CHECK-LE: fdiv <4 x float> res_vd = vec_div(vd, vd); // CHECK: fdiv <2 x double> // CHECK-LE: fdiv <2 x double> /* vec_max */ res_vf = vec_max(vf, vf); // CHECK: @llvm.ppc.vsx.xvmaxsp // CHECK-LE: @llvm.ppc.vsx.xvmaxsp res_vd = vec_max(vd, vd); // CHECK: @llvm.ppc.vsx.xvmaxdp // CHECK-LE: @llvm.ppc.vsx.xvmaxdp res_vf = vec_vmaxfp(vf, vf); // CHECK: @llvm.ppc.vsx.xvmaxsp // CHECK-LE: @llvm.ppc.vsx.xvmaxsp /* vec_min */ res_vf = vec_min(vf, vf); // CHECK: @llvm.ppc.vsx.xvminsp // CHECK-LE: @llvm.ppc.vsx.xvminsp res_vd = vec_min(vd, vd); // CHECK: @llvm.ppc.vsx.xvmindp // CHECK-LE: @llvm.ppc.vsx.xvmindp res_vf = vec_vminfp(vf, vf); // CHECK: @llvm.ppc.vsx.xvminsp // CHECK-LE: @llvm.ppc.vsx.xvminsp res_d = __builtin_vsx_xsmaxdp(d, d); // CHECK: @llvm.ppc.vsx.xsmaxdp // CHECK-LE: @llvm.ppc.vsx.xsmaxdp res_d = __builtin_vsx_xsmindp(d, d); // CHECK: @llvm.ppc.vsx.xsmindp // CHECK-LE: @llvm.ppc.vsx.xsmindp /* vec_perm */ res_vsll = vec_perm(vsll, vsll, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_perm(vull, vull, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vbll = vec_perm(vbll, vbll, vuc); // CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> // CHECK-LE: xor <16 x i8> // CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> res_vf = vec_round(vf); // CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> // CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float> res_vd = vec_round(vd); // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> // CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double> res_vd = vec_perm(vd, vd, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vd = vec_splat(vd, 1); // CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> // CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> // CHECK-LE: xor <16 x i8> // CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> // CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> res_vbll = vec_splat(vbll, 1); // CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> // CHECK-LE: xor <16 x i8> // CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> res_vsll = vec_splat(vsll, 1); // CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> // CHECK-LE: xor <16 x i8> // CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> res_vull = vec_splat(vull, 1); // CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> // CHECK-LE: xor <16 x i8> // CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> res_vsi = vec_pack(vsll, vsll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vui = vec_pack(vull, vull); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vbi = vec_pack(vbll, vbll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_vperm(vsll, vsll, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_vperm(vull, vull, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vd = vec_vperm(vd, vd, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_vsx_ld */ res_vsi = vec_vsx_ld(0, &vsi); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vui = vec_vsx_ld(0, &vui); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vf = vec_vsx_ld (0, &vf); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vsll = vec_vsx_ld(0, &vsll); // CHECK: @llvm.ppc.vsx.lxvd2x // CHECK-LE: @llvm.ppc.vsx.lxvd2x res_vull = vec_vsx_ld(0, &vull); // CHECK: @llvm.ppc.vsx.lxvd2x // CHECK-LE: @llvm.ppc.vsx.lxvd2x res_vd = vec_vsx_ld(0, &vd); // CHECK: @llvm.ppc.vsx.lxvd2x // CHECK-LE: @llvm.ppc.vsx.lxvd2x res_vull = vec_vsx_ld(0, &vull); // CHECK: @llvm.ppc.vsx.lxvd2x // CHECK-LE: @llvm.ppc.vsx.lxvd2x res_vd = vec_vsx_ld(0, &vd); // CHECK: @llvm.ppc.vsx.lxvd2x // CHECK-LE: @llvm.ppc.vsx.lxvd2x res_vss = vec_vsx_ld(0, &vss); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vss = vec_vsx_ld(0, &ss); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vus = vec_vsx_ld(0, &vus); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vus = vec_vsx_ld(0, &us); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vbc = vec_vsx_ld(0, &vbc); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vsc = vec_vsx_ld(0, &vsc); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vuc = vec_vsx_ld(0, &vuc); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vsc = vec_vsx_ld(0, &sc); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x res_vuc = vec_vsx_ld(0, &uc); // CHECK: @llvm.ppc.vsx.lxvw4x // CHECK-LE: @llvm.ppc.vsx.lxvw4x /* vec_vsx_st */ vec_vsx_st(vsi, 0, &res_vsi); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vsi, 0, &res_si); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vui, 0, &res_vui); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vui, 0, &res_ui); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vf, 0, &res_vf); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vsll, 0, &res_vsll); // CHECK: @llvm.ppc.vsx.stxvd2x // CHECK-LE: @llvm.ppc.vsx.stxvd2x vec_vsx_st(vull, 0, &res_vull); // CHECK: @llvm.ppc.vsx.stxvd2x // CHECK-LE: @llvm.ppc.vsx.stxvd2x vec_vsx_st(vd, 0, &res_vd); // CHECK: @llvm.ppc.vsx.stxvd2x // CHECK-LE: @llvm.ppc.vsx.stxvd2x vec_vsx_st(vss, 0, &res_vss); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vss, 0, &res_ss); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vus, 0, &res_vus); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vus, 0, &res_us); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vsc, 0, &res_vsc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vsc, 0, &res_sc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vuc, 0, &res_vuc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vuc, 0, &res_uc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vbc, 0, &res_vbc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vbc, 0, &res_sc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vbc, 0, &res_uc); // CHECK: @llvm.ppc.vsx.stxvw4x // CHECK-LE: @llvm.ppc.vsx.stxvw4x /* vec_and */ res_vsll = vec_and(vsll, vsll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_and(vbll, vsll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_and(vsll, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_and(vull, vull); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_and(vbll, vull); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_and(vull, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vbll = vec_and(vbll, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> /* vec_vand */ res_vsll = vec_vand(vsll, vsll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_vand(vbll, vsll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_vand(vsll, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_vand(vull, vull); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_vand(vbll, vull); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_vand(vull, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> res_vbll = vec_vand(vbll, vbll); // CHECK: and <2 x i64> // CHECK-LE: and <2 x i64> /* vec_andc */ res_vsll = vec_andc(vsll, vsll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_andc(vbll, vsll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vsll = vec_andc(vsll, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_andc(vull, vull); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_andc(vbll, vull); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vull = vec_andc(vull, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vbll = vec_andc(vbll, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> // CHECK-LE: xor <2 x i64> // CHECK-LE: and <2 x i64> res_vf = vec_floor(vf); // CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_floor(vd); // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_madd(vf, vf, vf); // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) res_vd = vec_madd(vd, vd, vd); // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) /* vec_mergeh */ res_vsll = vec_mergeh(vsll, vsll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergeh(vsll, vbll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergeh(vbll, vsll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vull, vull); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vull, vbll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vbll, vull); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_mergel */ res_vsll = vec_mergel(vsll, vsll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergel(vsll, vbll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergel(vbll, vsll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vull, vull); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vull, vbll); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vbll, vull); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_msub */ res_vf = vec_msub(vf, vf, vf); // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> // CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_msub(vd, vd, vd); // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> // CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-LE-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> res_vsll = vec_mul(vsll, vsll); // CHECK: mul <2 x i64> // CHECK-LE: mul <2 x i64> res_vull = vec_mul(vull, vull); // CHECK: mul <2 x i64> // CHECK-LE: mul <2 x i64> res_vf = vec_mul(vf, vf); // CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}} res_vd = vec_mul(vd, vd); // CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}} res_vf = vec_nearbyint(vf); // CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_nearbyint(vd); // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_nmadd(vf, vf, vf); // CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) // CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]] // CHECK-LE: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) // CHECK-LE-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]] res_vd = vec_nmadd(vd, vd, vd); // CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) // CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] // CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) // CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] res_vf = vec_nmsub(vf, vf, vf); // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> // CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} res_vd = vec_nmsub(vd, vd, vd); // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> // CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] // CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> // CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] /* vec_nor */ res_vsll = vec_nor(vsll, vsll); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> // CHECK-LE: or <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_nor(vull, vull); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> // CHECK-LE: or <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_nor(vbll, vbll); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> // CHECK-LE: or <2 x i64> // CHECK-LE: xor <2 x i64> res_vd = vec_nor(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1> // CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1> /* vec_or */ res_vsll = vec_or(vsll, vsll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vsll = vec_or(vbll, vsll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vsll = vec_or(vsll, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_or(vull, vull); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_or(vbll, vull); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_or(vull, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vbll = vec_or(vbll, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vd = vec_or(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} res_vd = vec_or(vbll, vd); // CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]] // CHECK: bitcast <2 x i64> [[T2]] to <2 x double> // CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]] // CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double> res_vd = vec_or(vd, vbll); // CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[T2]] to <2 x double> // CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}} // CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double> res_vf = vec_re(vf); // CHECK: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float> // CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float> res_vd = vec_re(vd); // CHECK: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double> // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double> res_vf = vec_rint(vf); // CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_rint(vd); // CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_rsqrte(vf); // CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) res_vd = vec_rsqrte(vd); // CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vf = vec_sel(vd, vd, vbll); // CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> // CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_sel(vd, vd, vull); // CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> // CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> res_vf = vec_sqrt(vf); // CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_sqrt(vd); // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}}) res_vd = vec_sub(vd, vd); // CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}} res_vf = vec_trunc(vf); // CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}}) // CHECK-LE: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_trunc(vd); // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}}) /* vec_vor */ res_vsll = vec_vor(vsll, vsll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vsll = vec_vor(vbll, vsll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vsll = vec_vor(vsll, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_vor(vull, vull); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_vor(vbll, vull); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vull = vec_vor(vull, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> res_vbll = vec_vor(vbll, vbll); // CHECK: or <2 x i64> // CHECK-LE: or <2 x i64> /* vec_xor */ res_vsll = vec_xor(vsll, vsll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vsll = vec_xor(vbll, vsll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vsll = vec_xor(vsll, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_xor(vull, vull); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_xor(vbll, vull); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_xor(vull, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vbll = vec_xor(vbll, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_xor(vd, vd); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> // CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_xor(vd, vbll); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> // CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vd = vec_xor(vbll, vd); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> // CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double> /* vec_vxor */ res_vsll = vec_vxor(vsll, vsll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vsll = vec_vxor(vbll, vsll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vsll = vec_vxor(vsll, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_vxor(vull, vull); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_vxor(vbll, vull); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vull = vec_vxor(vull, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vbll = vec_vxor(vbll, vbll); // CHECK: xor <2 x i64> // CHECK-LE: xor <2 x i64> res_vsll = vec_cts(vd, 0); // CHECK: fmul <2 x double> // CHECK: fptosi <2 x double> %{{.*}} to <2 x i64> // CHECK-LE: fmul <2 x double> // CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64> res_vsll = vec_cts(vd, 31); // CHECK: fmul <2 x double> // CHECK: fptosi <2 x double> %{{.*}} to <2 x i64> // CHECK-LE: fmul <2 x double> // CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64> res_vsll = vec_ctu(vd, 0); // CHECK: fmul <2 x double> // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64> // CHECK-LE: fmul <2 x double> // CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64> res_vsll = vec_ctu(vd, 31); // CHECK: fmul <2 x double> // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64> // CHECK-LE: fmul <2 x double> // CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64> res_vd = vec_ctf(vsll, 0); // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> // CHECK: fmul <2 x double> // CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> res_vd = vec_ctf(vsll, 31); // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> // CHECK: fmul <2 x double> // CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> res_vd = vec_ctf(vull, 0); // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK: fmul <2 x double> // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> res_vd = vec_ctf(vull, 31); // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK: fmul <2 x double> // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> }
void f (void *p) { vector unsigned int u32 = vec_vsx_ld (1, (const unsigned int *)p); vec_ste (u32, 1, (unsigned int *)p); }
void test1() { // CHECK-LABEL: define void @test1 res_vd = vec_add(vd, vd); // CHECK: fadd <2 x double> res_vd = vec_and(vbll, vd); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> res_vd = vec_and(vd, vbll); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> res_vd = vec_and(vd, vd); // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_andc(vbll, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_andc(vd, vbll); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_andc(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> // CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_ceil(vd); // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}}) res_vf = vec_ceil(vf); // CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}}) res_vbll = vec_cmpeq(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpeq(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmpge(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpge(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmpgt(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmpgt(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmple(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmple(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) res_vbll = vec_cmplt(vd, vd); // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}}) res_vbi = vec_cmplt(vf, vf); // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}}) /* vec_div */ res_vf = vec_div(vf, vf); // CHECK: @llvm.ppc.vsx.xvdivsp res_vd = vec_div(vd, vd); // CHECK: @llvm.ppc.vsx.xvdivdp /* vec_max */ res_vf = vec_max(vf, vf); // CHECK: @llvm.ppc.vsx.xvmaxsp res_vd = vec_max(vd, vd); // CHECK: @llvm.ppc.vsx.xvmaxdp res_vf = vec_vmaxfp(vf, vf); // CHECK: @llvm.ppc.vsx.xvmaxsp /* vec_min */ res_vf = vec_min(vf, vf); // CHECK: @llvm.ppc.vsx.xvminsp res_vd = vec_min(vd, vd); // CHECK: @llvm.ppc.vsx.xvmindp res_vf = vec_vminfp(vf, vf); // CHECK: @llvm.ppc.vsx.xvminsp res_d = __builtin_vsx_xsmaxdp(d, d); // CHECK: @llvm.ppc.vsx.xsmaxdp res_d = __builtin_vsx_xsmindp(d, d); // CHECK: @llvm.ppc.vsx.xsmindp /* vec_perm */ res_vsll = vec_perm(vsll, vsll, vuc); // CHECK: @llvm.ppc.altivec.vperm res_vull = vec_perm(vull, vull, vuc); // CHECK: @llvm.ppc.altivec.vperm res_vd = vec_perm(vd, vd, vuc); // CHECK: @llvm.ppc.altivec.vperm res_vsll = vec_vperm(vsll, vsll, vuc); // CHECK: @llvm.ppc.altivec.vperm res_vull = vec_vperm(vull, vull, vuc); // CHECK: @llvm.ppc.altivec.vperm res_vd = vec_vperm(vd, vd, vuc); // CHECK: @llvm.ppc.altivec.vperm /* vec_vsx_ld */ res_vsi = vec_vsx_ld(0, &vsi); // CHECK: @llvm.ppc.vsx.lxvw4x res_vui = vec_vsx_ld(0, &vui); // CHECK: @llvm.ppc.vsx.lxvw4x res_vf = vec_vsx_ld (0, &vf); // CHECK: @llvm.ppc.vsx.lxvw4x res_vsll = vec_vsx_ld(0, &vsll); // CHECK: @llvm.ppc.vsx.lxvd2x res_vull = vec_vsx_ld(0, &vull); // CHECK: @llvm.ppc.vsx.lxvd2x res_vd = vec_vsx_ld(0, &vd); // CHECK: @llvm.ppc.vsx.lxvd2x /* vec_vsx_st */ vec_vsx_st(vsi, 0, &res_vsi); // CHECK: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vui, 0, &res_vui); // CHECK: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vf, 0, &res_vf); // CHECK: @llvm.ppc.vsx.stxvw4x vec_vsx_st(vsll, 0, &res_vsll); // CHECK: @llvm.ppc.vsx.stxvd2x vec_vsx_st(vull, 0, &res_vull); // CHECK: @llvm.ppc.vsx.stxvd2x vec_vsx_st(vd, 0, &res_vd); // CHECK: @llvm.ppc.vsx.stxvd2x /* vec_and */ res_vsll = vec_and(vsll, vsll); // CHECK: and <2 x i64> res_vsll = vec_and(vbll, vsll); // CHECK: and <2 x i64> res_vsll = vec_and(vsll, vbll); // CHECK: and <2 x i64> res_vull = vec_and(vull, vull); // CHECK: and <2 x i64> res_vull = vec_and(vbll, vull); // CHECK: and <2 x i64> res_vull = vec_and(vull, vbll); // CHECK: and <2 x i64> res_vbll = vec_and(vbll, vbll); // CHECK: and <2 x i64> /* vec_vand */ res_vsll = vec_vand(vsll, vsll); // CHECK: and <2 x i64> res_vsll = vec_vand(vbll, vsll); // CHECK: and <2 x i64> res_vsll = vec_vand(vsll, vbll); // CHECK: and <2 x i64> res_vull = vec_vand(vull, vull); // CHECK: and <2 x i64> res_vull = vec_vand(vbll, vull); // CHECK: and <2 x i64> res_vull = vec_vand(vull, vbll); // CHECK: and <2 x i64> res_vbll = vec_vand(vbll, vbll); // CHECK: and <2 x i64> /* vec_andc */ res_vsll = vec_andc(vsll, vsll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vsll = vec_andc(vbll, vsll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vsll = vec_andc(vsll, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vull = vec_andc(vull, vull); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vull = vec_andc(vbll, vull); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vull = vec_andc(vull, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vbll = vec_andc(vbll, vbll); // CHECK: xor <2 x i64> // CHECK: and <2 x i64> res_vf = vec_floor(vf); // CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_floor(vd); // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_madd(vf, vf, vf); // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) res_vd = vec_madd(vd, vd, vd); // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) res_vf = vec_msub(vf, vf, vf); // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_msub(vd, vd, vd); // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> res_vf = vec_mul(vf, vf); // CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}} res_vd = vec_mul(vd, vd); // CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}} res_vf = vec_nearbyint(vf); // CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_nearbyint(vd); // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_nmadd(vf, vf, vf); // CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) // CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]] res_vd = vec_nmadd(vd, vd, vd); // CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) // CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] res_vf = vec_nmsub(vf, vf, vf); // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> // CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}} res_vd = vec_nmsub(vd, vd, vd); // CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}} // CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> // CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]] /* vec_nor */ res_vsll = vec_nor(vsll, vsll); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> res_vull = vec_nor(vull, vull); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> res_vull = vec_nor(vbll, vbll); // CHECK: or <2 x i64> // CHECK: xor <2 x i64> res_vd = vec_nor(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1> /* vec_or */ res_vsll = vec_or(vsll, vsll); // CHECK: or <2 x i64> res_vsll = vec_or(vbll, vsll); // CHECK: or <2 x i64> res_vsll = vec_or(vsll, vbll); // CHECK: or <2 x i64> res_vull = vec_or(vull, vull); // CHECK: or <2 x i64> res_vull = vec_or(vbll, vull); // CHECK: or <2 x i64> res_vull = vec_or(vull, vbll); // CHECK: or <2 x i64> res_vbll = vec_or(vbll, vbll); // CHECK: or <2 x i64> res_vd = vec_or(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} res_vf = vec_rint(vf); // CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_rint(vd); // CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_rsqrte(vf); // CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) res_vd = vec_rsqrte(vd); // CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) dummy(); // CHECK: call void @dummy() res_vf = vec_sel(vd, vd, vbll); // CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_sel(vd, vd, vull); // CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1> // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> res_vf = vec_sqrt(vf); // CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_sqrt(vd); // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}}) res_vd = vec_sub(vd, vd); // CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}} res_vf = vec_trunc(vf); // CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_trunc(vd); // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}}) /* vec_vor */ res_vsll = vec_vor(vsll, vsll); // CHECK: or <2 x i64> res_vsll = vec_vor(vbll, vsll); // CHECK: or <2 x i64> res_vsll = vec_vor(vsll, vbll); // CHECK: or <2 x i64> res_vull = vec_vor(vull, vull); // CHECK: or <2 x i64> res_vull = vec_vor(vbll, vull); // CHECK: or <2 x i64> res_vull = vec_vor(vull, vbll); // CHECK: or <2 x i64> res_vbll = vec_vor(vbll, vbll); // CHECK: or <2 x i64> /* vec_xor */ res_vsll = vec_xor(vsll, vsll); // CHECK: xor <2 x i64> res_vsll = vec_xor(vbll, vsll); // CHECK: xor <2 x i64> res_vsll = vec_xor(vsll, vbll); // CHECK: xor <2 x i64> res_vull = vec_xor(vull, vull); // CHECK: xor <2 x i64> res_vull = vec_xor(vbll, vull); // CHECK: xor <2 x i64> res_vull = vec_xor(vull, vbll); // CHECK: xor <2 x i64> res_vbll = vec_xor(vbll, vbll); // CHECK: xor <2 x i64> dummy(); // CHECK: call void @dummy() res_vd = vec_xor(vd, vd); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_xor(vd, vbll); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> dummy(); // CHECK: call void @dummy() res_vd = vec_xor(vbll, vd); // CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: bitcast <2 x i64> [[X1]] to <2 x double> /* vec_vxor */ res_vsll = vec_vxor(vsll, vsll); // CHECK: xor <2 x i64> res_vsll = vec_vxor(vbll, vsll); // CHECK: xor <2 x i64> res_vsll = vec_vxor(vsll, vbll); // CHECK: xor <2 x i64> res_vull = vec_vxor(vull, vull); // CHECK: xor <2 x i64> res_vull = vec_vxor(vbll, vull); // CHECK: xor <2 x i64> res_vull = vec_vxor(vull, vbll); // CHECK: xor <2 x i64> res_vbll = vec_vxor(vbll, vbll); // CHECK: xor <2 x i64> }
void x264_zigzag_interleave_8x8_cavlc_altivec( int16_t *dst, int16_t *src, uint8_t *nnz ) { vec_s16_t tmpv[8]; vec_s16_t merge[2]; vec_s16_t permv[2]; vec_s16_t orv[4]; vec_s16_t src0v = vec_ld( 0*16, src ); vec_s16_t src1v = vec_ld( 1*16, src ); vec_s16_t src2v = vec_ld( 2*16, src ); vec_s16_t src3v = vec_ld( 3*16, src ); vec_s16_t src4v = vec_ld( 4*16, src ); vec_s16_t src5v = vec_ld( 5*16, src ); vec_s16_t src6v = vec_ld( 6*16, src ); vec_s16_t src7v = vec_ld( 7*16, src ); vec_u8_t pack; vec_u8_t nnzv = vec_vsx_ld( 0, nnz ); vec_u8_t shift = vec_splat_u8( 7 ); LOAD_ZERO; const vec_u8_t mask[3] = { { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }, { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }, { 0x10, 0x11, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x12, 0x13, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F } }; tmpv[0] = vec_mergeh( src0v, src1v ); tmpv[1] = vec_mergel( src0v, src1v ); tmpv[2] = vec_mergeh( src2v, src3v ); tmpv[3] = vec_mergel( src2v, src3v ); tmpv[4] = vec_mergeh( src4v, src5v ); tmpv[5] = vec_mergel( src4v, src5v ); tmpv[6] = vec_mergeh( src6v, src7v ); tmpv[7] = vec_mergel( src6v, src7v ); merge[0] = vec_mergeh( tmpv[0], tmpv[1] ); merge[1] = vec_mergeh( tmpv[2], tmpv[3] ); permv[0] = vec_perm( merge[0], merge[1], mask[0] ); permv[1] = vec_perm( merge[0], merge[1], mask[1] ); vec_st( permv[0], 0*16, dst ); merge[0] = vec_mergeh( tmpv[4], tmpv[5] ); merge[1] = vec_mergeh( tmpv[6], tmpv[7] ); permv[0] = vec_perm( merge[0], merge[1], mask[0] ); permv[2] = vec_perm( merge[0], merge[1], mask[1] ); vec_st( permv[0], 1*16, dst ); vec_st( permv[1], 2*16, dst ); vec_st( permv[2], 3*16, dst ); merge[0] = vec_mergel( tmpv[0], tmpv[1] ); merge[1] = vec_mergel( tmpv[2], tmpv[3] ); permv[0] = vec_perm( merge[0], merge[1], mask[0] ); permv[1] = vec_perm( merge[0], merge[1], mask[1] ); vec_st( permv[0], 4*16, dst ); merge[0] = vec_mergel( tmpv[4], tmpv[5] ); merge[1] = vec_mergel( tmpv[6], tmpv[7] ); permv[0] = vec_perm( merge[0], merge[1], mask[0] ); permv[2] = vec_perm( merge[0], merge[1], mask[1] ); vec_st( permv[0], 5*16, dst ); vec_st( permv[1], 6*16, dst ); vec_st( permv[2], 7*16, dst ); orv[0] = vec_or( src0v, src1v ); orv[1] = vec_or( src2v, src3v ); orv[2] = vec_or( src4v, src5v ); orv[3] = vec_or( src6v, src7v ); permv[0] = vec_or( orv[0], orv[1] ); permv[1] = vec_or( orv[2], orv[3] ); permv[0] = vec_or( permv[0], permv[1] ); permv[1] = vec_perm( permv[0], permv[0], mask[1] ); permv[0] = vec_or( permv[0], permv[1] ); pack = (vec_u8_t)vec_packs( permv[0], permv[0] ); pack = (vec_u8_t)vec_cmpeq( pack, zerov ); pack = vec_nor( pack, zerov ); pack = vec_sr( pack, shift ); nnzv = vec_perm( nnzv, pack, mask[2] ); vec_st( nnzv, 0, nnz ); }