Ejemplo n.º 1
0
int main(void)
{
    uint8_t v1_init[8] = {1, 1, 1, 1, 1, 1, 1, 1};
    uint8_t v2_init[8] = {2, 2, 2, 2, 2, 2, 2, 2};
    uint8x8_t v1 = vld1_u8 (v1_init);
    uint8x8_t v2 = vld1_u8 (v2_init);
    uint8x8x2_t vd1, vd2;
    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
    int i;
    uint8_t odd, even;

    vd1 = vzip_u8(v1, vdup_n_u8(0));
    vd2 = vzip_u8(v2, vdup_n_u8(0));

    vst1_u8(d1.buf, vd1.val[0]);
    vst1_u8(d2.buf, vd1.val[1]);
    vst1_u8(d3.buf, vd2.val[0]);
    vst1_u8(d4.buf, vd2.val[1]);

#ifdef __ARMEL__
    odd = 1;
    even = 0;
#else
    odd = 0;
    even = 1;
#endif

    for (i = 0; i < 8; i++)
      if ((i % 2 == even && d4.buf[i] != 2)
          || (i % 2 == odd && d4.buf[i] != 0))
         abort ();

    return 0;
}
Ejemplo n.º 2
0
Archivo: vzipu8.c Proyecto: 0day-ci/gcc
void test_vzipu8 (void)
{
  uint8x8x2_t out_uint8x8x2_t;
  uint8x8_t arg0_uint8x8_t;
  uint8x8_t arg1_uint8x8_t;

  out_uint8x8x2_t = vzip_u8 (arg0_uint8x8_t, arg1_uint8x8_t);
}
Ejemplo n.º 3
0
uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
  // CHECK-LABEL: test_vzip_u8
  return vzip_u8(a, b);
  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
Ejemplo n.º 4
0
/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */
shuffle8_neon(uint8_t
* const dest,
const uint8_t* const src,
const size_t vectorizable_elements,
const size_t total_elements
)
{
size_t i, j, k, l;
static const size_t bytesoftype = 8;
uint8x8x2_t r0[4];
uint16x4x2_t r1[4];
uint32x2x2_t r2[4];

for(
i = 0, k = 0;
i<vectorizable_elements*bytesoftype;
i += 64, k++) {
/* Load and interleave groups of 8 bytes (64 bytes) to the structure r0 */
for(
j = 0;
j < 4; j++) {
r0[j] =
vzip_u8(vld1_u8(src + i + (2 * j) * 8), vld1_u8(src + i + (2 * j + 1) * 8)
);
}
/* Interleave 16 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
r1[j*2+l] =
vzip_u16(vreinterpret_u16_u8(r0[j * 2].val[l]), vreinterpret_u16_u8(r0[j * 2 + 1].val[l])
);
}
}
/* Interleave 32 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
r2[j*2+l] =
vzip_u32(vreinterpret_u32_u16(r1[j].val[l]), vreinterpret_u32_u16(r1[j + 2].val[l])
);
}
}
/* Store the results in the destination vector */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
vst1_u8(dest
+ k*8 + (j*2+l)*total_elements,
vreinterpret_u8_u32(r2[j]
.val[l]));
}
}
}
}
Ejemplo n.º 5
0
/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */
unshuffle16_neon(uint8_t
* const dest,
const uint8_t* const src,
const size_t vectorizable_elements,
const size_t total_elements
)
{
size_t i, j, k, l, m;
static const size_t bytesoftype = 16;
uint8x8x2_t r0[8];
uint16x4x2_t r1[8];
uint32x2x2_t r2[8];

for(
i = 0, k = 0;
i<vectorizable_elements*bytesoftype;
i += 128, k++) {
/* Load and interleave groups of 16 bytes (128 bytes) to the structure r0*/
for(
j = 0;
j < 8; j++) {
r0[j] =
vzip_u8(vld1_u8(src + (2 * j) * total_elements + k * 8), vld1_u8(src + (2 * j + 1) * total_elements + k * 8)
);
}
/* Interleave 16 bytes */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
r1[2*j+l] =
vzip_u16(vreinterpret_u16_u8(r0[2 * j].val[l]), vreinterpret_u16_u8(r0[2 * j + 1].val[l])
);
}
}
/* Interleave 32 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
for(
m = 0;
m < 2; m++) {
r2[j*2+l+4*m] =
vzip_u32(vreinterpret_u32_u16(r1[j + 4 * m].val[l]), vreinterpret_u32_u16(r1[j + 2 + 4 * m].val[l])
);
}
}
}
/* Store the results in the destination vector */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
for(
m = 0;
m < 2; m++) {
vst1_u8(dest
+ i + (4*j+m+2*l)*8,
vreinterpret_u8_u32(r2[j + 4 * m]
.val[l]));
}
}
}
}
}