int test (v4si *x, v4si *y) { v4si m = { 2, 3, 6, 5 }; v4si z = __builtin_shuffle (*x, *y, m); return z[2]; }
/* { dg-do compile { target { ! { ia32 } } } } */ /* { dg-options "-mno-sse -Wvector-operation-performance" } */ #define vector(elcount, type) \ __attribute__((vector_size((elcount)*sizeof(type)))) type int main (int argc, char *argv[]) { vector (4, int) v0 = {argc, 1, 15, 38}; vector (4, int) v1 = {-4, argc, 2, 11}; vector (4, int) res[] = { v0 + v1, /* { dg-warning "expanded piecewise" } */ v0 - v1, /* { dg-warning "expanded piecewise" } */ v0 > v1, /* { dg-warning "expanded piecewise" } */ v0 & v1, /* { dg-warning "expanded in parallel" } */ __builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */ __builtin_shuffle (v0, v1, v1) /* { dg-warning "expanded piecewise" } */ }; return res[argc][argc]; }
v4si vs (v4si a, v4si b) { return __builtin_shuffle (a, b, (v4si) {0, 4, 1, 5}); }
static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch) { int xx; int yy; int yrep2; int yrep3; int blank; register v4hi *b2p; register v4hi r1, r2; v4hi *d0; register v4hi *b; int pitch2; register int ip; v4hi r3v[5 * 80]; #if AG_BIG_ENDIAN != 1 const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000}; const v4ui order3 = (v4ui){3, 3, 4, 4}; #else const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff}; const v4ui order3 = (v4ui){3, 3, 4, 4}; #endif if(repeat <= 0) return; b = (v4hi *)src; b2p = dst; pitch2 = pitch / sizeof(v4hi); // _prefetch_data_read_l2((void *)src, sizeof(Uint32) * ww); _prefetch_data_write_l1((void *)r3v, sizeof(r3v)); if(__builtin_expect(((bFullScan) || (repeat < 2)), 1)) { ip = 0; for(xx = 0; xx < ww; xx += 8) { b2p = dst; r1 = b[0]; r2 = b[1]; // 76543210 -> 77666554443322211000 r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000); r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001); r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3); r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100); r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010); ip += 5; b += 2; } for(yy = 0; yy < repeat; yy++) { // _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5); memcpy((void *)b2p, (void *)r3v, sizeof(v4hi) * ip); b2p = b2p + pitch2; } } else { ip = 0; for(xx = 0; xx < ww; xx += 8) { yy = 0; // b2p = dst; r1 = b[0]; r2 = b[1]; // 76543210 -> 77666554443322211000 r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000); r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001); r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3); r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100); r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010); ip += 5; b += 2; } b2p = dst; for(yy = 0; yy < repeat - 1; yy++) { // _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5); memcpy((void *)b2p, (void *)r3v, ip * sizeof(v4hi)); b2p = b2p + pitch2; } // _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5); for(xx = 0; xx < ip; xx++) b2p[xx].uv = bb; } }
long f (long d, long e) { vec x = { d, e }; vec m = { 1, 0 }; return __builtin_shuffle (x, m) [1]; }
v2df foo1 (v2df x, v2df y) { v2df tem0 = x - y; v2df tem1 = x + y; return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 }); }
v2df foo4 (v2df x, v2df y) { v2df tem0 = y + x; v2df tem1 = x - y; return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 }); }