Example #1
0
static inline void PreShiftW32toW16Neon(int32_t* inre,
                                        int32_t* inim,
                                        int16_t* outre,
                                        int16_t* outim,
                                        int32_t sh) {
  int k;
  int32x4_t sh32x4 = vdupq_n_s32(sh);
  for (k = 0; k < FRAMESAMPLES/2; k += 16) {
    int32x4x4_t inre32x4x4 = vld4q_s32(inre);
    int32x4x4_t inim32x4x4 = vld4q_s32(inim);
    inre += 16;
    inim += 16;
    inre32x4x4.val[0] = vrshlq_s32(inre32x4x4.val[0], sh32x4);
    inre32x4x4.val[1] = vrshlq_s32(inre32x4x4.val[1], sh32x4);
    inre32x4x4.val[2] = vrshlq_s32(inre32x4x4.val[2], sh32x4);
    inre32x4x4.val[3] = vrshlq_s32(inre32x4x4.val[3], sh32x4);
    inim32x4x4.val[0] = vrshlq_s32(inim32x4x4.val[0], sh32x4);
    inim32x4x4.val[1] = vrshlq_s32(inim32x4x4.val[1], sh32x4);
    inim32x4x4.val[2] = vrshlq_s32(inim32x4x4.val[2], sh32x4);
    inim32x4x4.val[3] = vrshlq_s32(inim32x4x4.val[3], sh32x4);
    int16x4x4_t outre16x4x4;
    int16x4x4_t outim16x4x4;
    outre16x4x4.val[0]  = vmovn_s32(inre32x4x4.val[0]);
    outre16x4x4.val[1]  = vmovn_s32(inre32x4x4.val[1]);
    outre16x4x4.val[2]  = vmovn_s32(inre32x4x4.val[2]);
    outre16x4x4.val[3]  = vmovn_s32(inre32x4x4.val[3]);
    outim16x4x4.val[0]  = vmovn_s32(inim32x4x4.val[0]);
    outim16x4x4.val[1]  = vmovn_s32(inim32x4x4.val[1]);
    outim16x4x4.val[2]  = vmovn_s32(inim32x4x4.val[2]);
    outim16x4x4.val[3]  = vmovn_s32(inim32x4x4.val[3]);
    vst4_s16(outre, outre16x4x4);
    vst4_s16(outim, outim16x4x4);
    outre += 16;
    outim += 16;
  }
}
Example #2
0
inline void vst4(s16 * ptr, const int16x4x4_t   & v) { return vst4_s16(ptr, v); }