void test_vst2Qs16 (void) { int16_t *arg0_int16_t; int16x8x2_t arg1_int16x8x2_t; vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t); }
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { int16x8x2_t qQ, qDQC, qDQ; qQ = vld2q_s16(d->qcoeff); qDQC = vld2q_s16(DQC); qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); vst2q_s16(d->dqcoeff, qDQ); }
void vp8_dequantize_b_loop_neon( int16_t *Q, int16_t *DQC, int16_t *DQ) { int16x8x2_t qQ, qDQC, qDQ; qQ = vld2q_s16(Q); qDQC = vld2q_s16(DQC); qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); vst2q_s16(DQ, qDQ); return; }
rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband_width) { int y, n; INT16 * l_ptr = l; INT16 * h_ptr = h; INT16 * dst_ptr = dst; for (y = 0; y < subband_width; y++) { /* Even coefficients */ for (n = 0; n < subband_width; n+=8) { // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1); int16x8_t l_n = vld1q_s16(l_ptr); int16x8_t h_n = vld1q_s16(h_ptr); int16x8_t h_n_m = vld1q_s16(h_ptr - 1); if (n == 0) { int16_t first = vgetq_lane_s16(h_n_m, 1); h_n_m = vsetq_lane_s16(first, h_n_m, 0); } int16x8_t tmp_n = vaddq_s16(h_n, h_n_m); tmp_n = vaddq_s16(tmp_n, vdupq_n_s16(1)); tmp_n = vshrq_n_s16(tmp_n, 1); int16x8_t dst_n = vsubq_s16(l_n, tmp_n); vst1q_s16(l_ptr, dst_n); l_ptr+=8; h_ptr+=8; } l_ptr -= subband_width; h_ptr -= subband_width; /* Odd coefficients */ for (n = 0; n < subband_width; n+=8) { // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); int16x8_t h_n = vld1q_s16(h_ptr); h_n = vshlq_n_s16(h_n, 1); int16x8x2_t dst_n; dst_n.val[0] = vld1q_s16(l_ptr); int16x8_t dst_n_p = vld1q_s16(l_ptr + 1); if (n == subband_width - 8) { int16_t last = vgetq_lane_s16(dst_n_p, 6); dst_n_p = vsetq_lane_s16(last, dst_n_p, 7); } dst_n.val[1] = vaddq_s16(dst_n_p, dst_n.val[0]); dst_n.val[1] = vshrq_n_s16(dst_n.val[1], 1); dst_n.val[1] = vaddq_s16(dst_n.val[1], h_n); vst2q_s16(dst_ptr, dst_n); l_ptr+=8; h_ptr+=8; dst_ptr+=16; } } }
inline void vst2q(s16 * ptr, const int16x8x2_t & v) { return vst2q_s16(ptr, v); }