void test_vst1_laneu8 (void) { uint8_t *arg0_uint8_t; uint8x8_t arg1_uint8x8_t; vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); }
void rotate_down_scale_cbcr_to_cr_cb(int wDest, int hDest, int full_width, uint8_t* cbcr_src, uint8_t* cr_dst, uint8_t* cb_dst,bool_t clockWise,bool_t down_scale) { #ifdef __ARM_NEON__ int hSrc = down_scale?wDest*2:wDest; int wSrc = down_scale?hDest*2:hDest; int src_stride = 2*full_width; int signed_dst_stride; int incr; int y_step=down_scale?2:1; if (clockWise) { /* ms_warning("start writing destination buffer from top right");*/ cb_dst += wDest - 1; cr_dst += wDest - 1; incr = 1; signed_dst_stride = wDest; } else { /* ms_warning("start writing destination buffer from top right");*/ cb_dst += wDest * (hDest - 1); cr_dst += wDest * (hDest - 1); incr = -1; signed_dst_stride = -wDest; } int x,y; for (y=0; y<hSrc; y+=y_step) { uint8_t* cb_dst2 = cb_dst; uint8_t* cr_dst2 = cr_dst; for (x=0; x<2*wSrc; x+=16) { uint8x8x2_t tmp = vld2_u8 (cbcr_src+x); vst1_lane_u8 (cb_dst2, tmp.val[0], 0); vst1_lane_u8 (cr_dst2, tmp.val[1], 0); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; if (!down_scale) { vst1_lane_u8 (cb_dst2, tmp.val[0], 1); vst1_lane_u8 (cr_dst2, tmp.val[1], 1); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; } vst1_lane_u8 (cb_dst2, tmp.val[0], 2); vst1_lane_u8 (cr_dst2, tmp.val[1], 2); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; if (!down_scale) { vst1_lane_u8 (cb_dst2, tmp.val[0], 3); vst1_lane_u8 (cr_dst2, tmp.val[1], 3); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; } vst1_lane_u8 (cb_dst2, tmp.val[0], 4); vst1_lane_u8 (cr_dst2, tmp.val[1], 4); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; if (!down_scale) { vst1_lane_u8 (cb_dst2, tmp.val[0], 5); vst1_lane_u8 (cr_dst2, tmp.val[1], 5); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; } vst1_lane_u8 (cb_dst2, tmp.val[0], 6); vst1_lane_u8 (cr_dst2, tmp.val[1], 6); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; if (!down_scale) { vst1_lane_u8 (cb_dst2, tmp.val[0], 7); vst1_lane_u8 (cr_dst2, tmp.val[1], 7); cb_dst2+=signed_dst_stride; cr_dst2+=signed_dst_stride; } } cb_dst -= incr; cr_dst -= incr; cbcr_src += src_stride*y_step; } #else ms_error("Neon function '%s' used without hw neon support", __FUNCTION__); #endif }