Пример #1
0
int
test (v4si *x, v4si *y)
{
  v4si m = { 2, 3, 6, 5 };
  v4si z = __builtin_shuffle (*x, *y, m);
  return z[2];
}
Пример #2
0
/* { dg-do compile { target { ! { ia32 } } } }  */
/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
#define vector(elcount, type)  \
__attribute__((vector_size((elcount)*sizeof(type)))) type

int main (int argc, char *argv[])
{
  vector (4, int) v0 = {argc, 1, 15, 38};
  vector (4, int) v1 = {-4, argc, 2, 11};
  vector (4, int) res[] = 
  {
    v0 + v1,	  /* { dg-warning "expanded piecewise" }  */
    v0 - v1,	  /* { dg-warning "expanded piecewise" }  */
    v0 > v1,	  /* { dg-warning "expanded piecewise" }  */
    v0 & v1,	  /* { dg-warning "expanded in parallel" }  */
    __builtin_shuffle (v0, v1),	    /* { dg-warning "expanded piecewise" }  */
    __builtin_shuffle (v0, v1, v1)  /* { dg-warning "expanded piecewise" }  */  
  };

  return res[argc][argc];
}
Пример #3
0
v4si vs (v4si a, v4si b)
{
  return __builtin_shuffle (a, b, (v4si) {0, 4, 1, 5});
}
Пример #4
0
static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
{
   int xx;
   int yy;
   int yrep2;
   int yrep3;
   int blank;
   register v4hi *b2p;
   register v4hi r1, r2;
   v4hi *d0;
   register v4hi *b;
   int pitch2;
   register int ip;
   v4hi r3v[5 * 80];
#if AG_BIG_ENDIAN != 1
   const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
   const v4ui order3 = (v4ui){3, 3, 4, 4};
#else
   const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
   const v4ui order3 = (v4ui){3, 3, 4, 4};
#endif
     
   if(repeat <= 0) return;
   b = (v4hi *)src;
   b2p = dst;
   pitch2 = pitch / sizeof(v4hi);
//   _prefetch_data_read_l2((void *)src, sizeof(Uint32) * ww);
   _prefetch_data_write_l1((void *)r3v, sizeof(r3v));
   if(__builtin_expect(((bFullScan) || (repeat < 2)), 1)) {
      ip = 0;
      for(xx = 0; xx < ww; xx += 8) {
	 b2p = dst;
	 r1 = b[0];
	 r2 = b[1];
	 // 76543210 -> 77666554443322211000
	 r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000);
	 r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001);
	 r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3);
	 r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100);
	 r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010);
	 ip += 5;
	 b += 2;
      }
      
      for(yy = 0; yy < repeat; yy++) {
	 //	    _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
	 memcpy((void *)b2p, (void *)r3v, sizeof(v4hi) * ip);
	 b2p = b2p + pitch2;
      }
   } else {
      ip = 0;
      for(xx = 0; xx < ww; xx += 8) {
	 yy = 0;
//	 b2p = dst;
	 r1 = b[0];
	 r2 = b[1];
	 // 76543210 -> 77666554443322211000
	 r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000);
	 r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001);
	 r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3);
	 r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100);
	 r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010);
	 ip += 5;
	 b += 2;
      }
      b2p = dst;
      for(yy = 0; yy < repeat - 1; yy++) {
//	    _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
	    memcpy((void *)b2p, (void *)r3v, ip * sizeof(v4hi));
	    b2p = b2p + pitch2;
      }
//	 _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
      for(xx = 0; xx < ip; xx++) b2p[xx].uv = bb;
   }
   
}
Пример #5
0
long f (long d, long e)
{
  vec x = { d, e };
  vec m = { 1, 0 };
  return __builtin_shuffle (x, m) [1];
}
Пример #6
0
v2df foo1 (v2df x, v2df y)
{
  v2df tem0 = x - y;
  v2df tem1 = x + y;
  return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
Пример #7
0
v2df foo4 (v2df x, v2df y)
{
  v2df tem0 = y + x;
  v2df tem1 = x - y;
  return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
}