示例#1
0
void test_vld1Q_lanes32 (void)
{
  int32x4_t out_int32x4_t;
  int32x4_t arg1_int32x4_t;

  out_int32x4_t = vld1q_lane_s32 (0, arg1_int32x4_t, 1);
}
示例#2
0
void test_ld1st1(int8x8_t small, int8x16_t big, void *addr) {
  vld1_lane_s8(addr, small, 7);
  vld1_lane_s16(addr, small, 3);
  vld1_lane_s32(addr, small, 1);
  vld1_lane_s64(addr, small, 0);

  vld1q_lane_s8(addr, big, 15);
  vld1q_lane_s16(addr, big, 7);
  vld1q_lane_s32(addr, big, 3);
  vld1q_lane_s64(addr, big, 1);

  vld1_lane_s8(addr, small, 8); // expected-error {{argument should be a value from 0 to 7}}
  vld1_lane_s16(addr, small, 4); // expected-error {{argument should be a value from 0 to 3}}
  vld1_lane_s32(addr, small, 2); // expected-error {{argument should be a value from 0 to 1}}
  vld1_lane_s64(addr, small, 1); // expected-error {{argument should be a value from 0 to 0}}

  vld1q_lane_s8(addr, big, 16); // expected-error {{argument should be a value from 0 to 15}}
  vld1q_lane_s16(addr, big, 8); // expected-error {{argument should be a value from 0 to 7}}
  vld1q_lane_s32(addr, big, 4); // expected-error {{argument should be a value from 0 to 3}}
  vld1q_lane_s64(addr, big, 2); // expected-error {{argument should be a value from 0 to 1}}

  vst1_lane_s8(addr, small, 7);
  vst1_lane_s16(addr, small, 3);
  vst1_lane_s32(addr, small, 1);
  vst1_lane_s64(addr, small, 0);

  vst1q_lane_s8(addr, big, 15);
  vst1q_lane_s16(addr, big, 7);
  vst1q_lane_s32(addr, big, 3);
  vst1q_lane_s64(addr, big, 1);

  vst1_lane_s8(addr, small, 8); // expected-error {{argument should be a value from 0 to 7}}
  vst1_lane_s16(addr, small, 4); // expected-error {{argument should be a value from 0 to 3}}
  vst1_lane_s32(addr, small, 2); // expected-error {{argument should be a value from 0 to 1}}
  vst1_lane_s64(addr, small, 1); // expected-error {{argument should be a value from 0 to 0}}

  vst1q_lane_s8(addr, big, 16); // expected-error {{argument should be a value from 0 to 15}}
  vst1q_lane_s16(addr, big, 8); // expected-error {{argument should be a value from 0 to 7}}
  vst1q_lane_s32(addr, big, 4); // expected-error {{argument should be a value from 0 to 3}}
  vst1q_lane_s64(addr, big, 2); // expected-error {{argument should be a value from 0 to 1}}
}
void WebRtcIsacfix_AllpassFilter2FixDec16Neon(
    int16_t* data_ch1,  // Input and output in channel 1, in Q0
    int16_t* data_ch2,  // Input and output in channel 2, in Q0
    const int16_t* factor_ch1,  // Scaling factor for channel 1, in Q15
    const int16_t* factor_ch2,  // Scaling factor for channel 2, in Q15
    const int length,  // Length of the data buffers
    int32_t* filter_state_ch1,  // Filter state for channel 1, in Q16
    int32_t* filter_state_ch2) {  // Filter state for channel 2, in Q16
  assert(length % 2 == 0);
  int n = 0;
  int16x4_t factorv;
  int16x4_t datav;
  int32x4_t statev;
  int32x2_t tmp;

  // Load factor_ch1 and factor_ch2.
  tmp = vld1_dup_s32((int32_t*)factor_ch1);
  tmp = vld1_lane_s32((int32_t*)factor_ch2, tmp, 1);
  factorv = vreinterpret_s16_s32(tmp);
  // Load filter_state_ch1[0] and filter_state_ch2[0].
  statev = vld1q_dup_s32(filter_state_ch1);
  statev = vld1q_lane_s32(filter_state_ch2, statev, 2);

  // Loop unrolling preprocessing.
  int32x4_t a;
  int16x4_t tmp1, tmp2;

  // Load data_ch1[0] and data_ch2[0].
  datav = vld1_dup_s16(data_ch1);
  datav = vld1_lane_s16(data_ch2, datav, 2);

  a = vqdmlal_s16(statev, datav, factorv);
  tmp1 = vshrn_n_s32(a, 16);

  // Update filter_state_ch1[0] and filter_state_ch2[0].
  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);

  // Load filter_state_ch1[1] and filter_state_ch2[1].
  statev = vld1q_lane_s32(filter_state_ch1 + 1, statev, 1);
  statev = vld1q_lane_s32(filter_state_ch2 + 1, statev, 3);

  // Load data_ch1[1] and data_ch2[1].
  tmp1 = vld1_lane_s16(data_ch1 + 1, tmp1, 1);
  tmp1 = vld1_lane_s16(data_ch2 + 1, tmp1, 3);
  datav = vrev32_s16(tmp1);

  // Loop unrolling processing.
  for (n = 0; n < length - 2; n += 2) {
    a = vqdmlal_s16(statev, datav, factorv);
    tmp1 = vshrn_n_s32(a, 16);
    // Store data_ch1[n] and data_ch2[n].
    vst1_lane_s16(data_ch1 + n, tmp1, 1);
    vst1_lane_s16(data_ch2 + n, tmp1, 3);

    // Update filter_state_ch1[0], filter_state_ch1[1]
    // and filter_state_ch2[0], filter_state_ch2[1].
    statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);

    // Load data_ch1[n + 2] and data_ch2[n + 2].
    tmp1 = vld1_lane_s16(data_ch1 + n + 2, tmp1, 1);
    tmp1 = vld1_lane_s16(data_ch2 + n + 2, tmp1, 3);
    datav = vrev32_s16(tmp1);

    a = vqdmlal_s16(statev, datav, factorv);
    tmp2 = vshrn_n_s32(a, 16);
    // Store data_ch1[n + 1] and data_ch2[n + 1].
    vst1_lane_s16(data_ch1 + n + 1, tmp2, 1);
    vst1_lane_s16(data_ch2 + n + 1, tmp2, 3);

    // Update filter_state_ch1[0], filter_state_ch1[1]
    // and filter_state_ch2[0], filter_state_ch2[1].
    statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp2, factorv);

    // Load data_ch1[n + 3] and data_ch2[n + 3].
    tmp2 = vld1_lane_s16(data_ch1 + n + 3, tmp2, 1);
    tmp2 = vld1_lane_s16(data_ch2 + n + 3, tmp2, 3);
    datav = vrev32_s16(tmp2);
  }

  // Loop unrolling post-processing.
  a = vqdmlal_s16(statev, datav, factorv);
  tmp1 = vshrn_n_s32(a, 16);
  // Store data_ch1[n] and data_ch2[n].
  vst1_lane_s16(data_ch1 + n, tmp1, 1);
  vst1_lane_s16(data_ch2 + n, tmp1, 3);

  // Update filter_state_ch1[0], filter_state_ch1[1]
  // and filter_state_ch2[0], filter_state_ch2[1].
  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);
  // Store filter_state_ch1[0] and filter_state_ch2[0].
  vst1q_lane_s32(filter_state_ch1, statev, 0);
  vst1q_lane_s32(filter_state_ch2, statev, 2);

  datav = vrev32_s16(tmp1);
  a = vqdmlal_s16(statev, datav, factorv);
  tmp2 = vshrn_n_s32(a, 16);
  // Store data_ch1[n + 1] and data_ch2[n + 1].
  vst1_lane_s16(data_ch1 + n + 1, tmp2, 1);
  vst1_lane_s16(data_ch2 + n + 1, tmp2, 3);

  // Update filter_state_ch1[1] and filter_state_ch2[1].
  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp2, factorv);
  // Store filter_state_ch1[1] and filter_state_ch2[1].
  vst1q_lane_s32(filter_state_ch1 + 1, statev, 1);
  vst1q_lane_s32(filter_state_ch2 + 1, statev, 3);
}