static INLINE void write_2x4(unsigned char *dst, int pitch, const uint8x8x2_t result) { /* * uint8x8x2_t result 00 01 02 03 | 04 05 06 07 10 11 12 13 | 14 15 16 17 --- * after vtrn_u8 00 10 02 12 | 04 14 06 16 01 11 03 13 | 05 15 07 17 */ const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], result.val[1]); const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); vst1_lane_u16((uint16_t *)dst, x_0_4, 0); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 0); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 1); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 1); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 2); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 2); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 3); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 3); }
void test_vst1_laneu16 (void) { uint16_t *arg0_uint16_t; uint16x4_t arg1_uint16x4_t; vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); }