static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) { const uint32_t* const end = argb_data + (num_pixels & ~3); const uint8x8_t shuffle = vld1_u8(kGreenShuffle); for (; argb_data < end; argb_data += 4) { const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data); const uint8x16_t greens = vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), vtbl1_u8(vget_high_u8(argb), shuffle)); vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens)); } // fallthrough and finish off with plain-C VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3); }
void test_vtbl1u8 (void) { uint8x8_t out_uint8x8_t; uint8x8_t arg0_uint8x8_t; uint8x8_t arg1_uint8x8_t; out_uint8x8_t = vtbl1_u8 (arg0_uint8x8_t, arg1_uint8x8_t); }
void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; const uint8x8_t sh_12345677 = vld1_u8(shuffle1); const uint8x8_t sh_23456777 = vld1_u8(shuffle2); const uint8x8_t A0 = vld1_u8(above); // top row const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); const uint8x8_t avg1 = vhadd_u8(A0, A2); uint8x8_t row = vrhadd_u8(avg1, A1); int i; (void)left; for (i = 0; i < 7; ++i) { vst1_u8(dst + i * stride, row); row = vtbl1_u8(row, sh_12345677); } vst1_u8(dst + i * stride, row); }
static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) { const uint32_t* const end = src + (num_pixels & ~1); const uint8x8_t shuffle = vld1_u8(kRGBAShuffle); for (; src < end; src += 2) { const uint8x8_t pixels = vld1_u8((uint8_t*)src); vst1_u8(dst, vtbl1_u8(pixels, shuffle)); dst += 8; } VP8LConvertBGRAToRGBA_C(src, num_pixels & 1, dst); // left-overs }
inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) { static constexpr UInt8 __attribute__((__aligned__(8))) masks[] = { 0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 0, 1, 2, 3, 4, 5, 0, 1, 0, 1, 2, 3, 4, 5, 6, 0, }; unalignedStore(op, vtbl1_u8(unalignedLoad<uint8x8_t>(match), unalignedLoad<uint8x8_t>(masks + 8 * offset))); match += masks[offset]; }
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, const uint8x8_t shuffle) { return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), vtbl1_u8(vget_high_u8(argb), shuffle)); }
uint8x8_t tbl_testu8_ (uint8x8_t tab, uint8x8_t idx) { return vtbl1_u8 (tab, idx); }
uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { // CHECK: test_vtbl1_u8 return vtbl1_u8(a, b); // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b }