inline unsigned int GetByteMask2(uint8x16_t a, uint8x16_t b) { uint8x16_t am = vandq_u8(a, compaction_mask); uint8x16_t bm = vandq_u8(b, compaction_mask); uint8x8_t a_sum = vpadd_u8(vget_high_u8(am), vget_low_u8(am)); uint8x8_t b_sum = vpadd_u8(vget_high_u8(bm), vget_low_u8(bm)); a_sum = vpadd_u8(b_sum, a_sum); a_sum = vpadd_u8(a_sum, a_sum); return vget_lane_u32(vreinterpret_u32_u8(a_sum), 0); }
uint8x8_t test_vpadd_u8 (uint8x8_t __a, uint8x8_t __b) { return vpadd_u8(__a, __b); }