C++ (Cpp) vzip_u32 예제들

프로그래밍 언어: C++ (Cpp)

메소드/함수: vzip_u32

hotexamples.com에서의 예제들: 4

C++ (Cpp) vzip_u32 - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 vzip_u32에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

void test_vzipu32 (void)
{
    uint32x2x2_t out_uint32x2x2_t;
    uint32x2_t arg0_uint32x2_t;
    uint32x2_t arg1_uint32x2_t;

    out_uint32x2x2_t = vzip_u32 (arg0_uint32x2_t, arg1_uint32x2_t);
}

예제 #2

파일 보기

파일: aarch64-neon-perm.c 프로젝트: Bigcheese/clang

uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
  // CHECK-LABEL: test_vzip_u32
  return vzip_u32(a, b);
  // CHECK: {{ins v[0-9]+.s\[1\], v[0-9]+.s\[0\]|zip1 v2.2s, v0.2s, v1.2s}}
  // CHECK: {{ins v[0-9]+.s\[0\], v[0-9]+.s\[1\]|zip2 v1.2s, v0.2s, v1.2s}}
}

예제 #3

파일 보기

파일: shuffle-neon.c 프로젝트: BillTheBest/c-blosc2

/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */
shuffle8_neon(uint8_t
* const dest,
const uint8_t* const src,
const size_t vectorizable_elements,
const size_t total_elements
)
{
size_t i, j, k, l;
static const size_t bytesoftype = 8;
uint8x8x2_t r0[4];
uint16x4x2_t r1[4];
uint32x2x2_t r2[4];

for(
i = 0, k = 0;
i<vectorizable_elements*bytesoftype;
i += 64, k++) {
/* Load and interleave groups of 8 bytes (64 bytes) to the structure r0 */
for(
j = 0;
j < 4; j++) {
r0[j] =
vzip_u8(vld1_u8(src + i + (2 * j) * 8), vld1_u8(src + i + (2 * j + 1) * 8)
);
}
/* Interleave 16 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
r1[j*2+l] =
vzip_u16(vreinterpret_u16_u8(r0[j * 2].val[l]), vreinterpret_u16_u8(r0[j * 2 + 1].val[l])
);
}
}
/* Interleave 32 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
r2[j*2+l] =
vzip_u32(vreinterpret_u32_u16(r1[j].val[l]), vreinterpret_u32_u16(r1[j + 2].val[l])
);
}
}
/* Store the results in the destination vector */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
vst1_u8(dest
+ k*8 + (j*2+l)*total_elements,
vreinterpret_u8_u32(r2[j]
.val[l]));
}
}
}
}

예제 #4

파일 보기

파일: shuffle-neon.c 프로젝트: BillTheBest/c-blosc2

/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */
unshuffle16_neon(uint8_t
* const dest,
const uint8_t* const src,
const size_t vectorizable_elements,
const size_t total_elements
)
{
size_t i, j, k, l, m;
static const size_t bytesoftype = 16;
uint8x8x2_t r0[8];
uint16x4x2_t r1[8];
uint32x2x2_t r2[8];

for(
i = 0, k = 0;
i<vectorizable_elements*bytesoftype;
i += 128, k++) {
/* Load and interleave groups of 16 bytes (128 bytes) to the structure r0*/
for(
j = 0;
j < 8; j++) {
r0[j] =
vzip_u8(vld1_u8(src + (2 * j) * total_elements + k * 8), vld1_u8(src + (2 * j + 1) * total_elements + k * 8)
);
}
/* Interleave 16 bytes */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
r1[2*j+l] =
vzip_u16(vreinterpret_u16_u8(r0[2 * j].val[l]), vreinterpret_u16_u8(r0[2 * j + 1].val[l])
);
}
}
/* Interleave 32 bytes */
for(
j = 0;
j < 2; j++) {
for(
l = 0;
l < 2; l++) {
for(
m = 0;
m < 2; m++) {
r2[j*2+l+4*m] =
vzip_u32(vreinterpret_u32_u16(r1[j + 4 * m].val[l]), vreinterpret_u32_u16(r1[j + 2 + 4 * m].val[l])
);
}
}
}
/* Store the results in the destination vector */
for(
j = 0;
j < 4; j++) {
for(
l = 0;
l < 2; l++) {
for(
m = 0;
m < 2; m++) {
vst1_u8(dest
+ i + (4*j+m+2*l)*8,
vreinterpret_u8_u32(r2[j + 4 * m]
.val[l]));
}
}
}
}
}