int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
                                   int in_vector_length,
                                   int times)
{
    int16_t nbits = WebRtcSpl_GetSizeInBits(times);
    int i;
    int16_t smax = -1;
    int16_t sabs;
    int16_t *sptr = in_vector;
    int16_t t;
    int looptimes = in_vector_length;

    for (i = looptimes; i > 0; i--)
    {
        sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
        smax = (sabs > smax ? sabs : smax);
    }
    t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));

    if (smax == 0)
    {
        return 0; // Since norm(0) returns 0
    } else
    {
        return (t > nbits) ? 0 : nbits - t;
    }
}
int WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
                              int in_vector_length,
                              int order,
                              int32_t* result,
                              int* scale) {
  int32_t sum = 0;
  int i = 0, j = 0;
  int16_t smax = 0;
  int scaling = 0;

  if (order > in_vector_length) {
    /* Undefined */
    return -1;
  } else if (order < 0) {
    order = in_vector_length;
  }

  // Find the maximum absolute value of the samples.
  smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);

  // In order to avoid overflow when computing the sum we should scale the
  // samples so that (in_vector_length * smax * smax) will not overflow.
  if (smax == 0) {
    scaling = 0;
  } else {
    // Number of bits in the sum loop.
    int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
    // Number of bits to normalize smax.
    int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));

    if (t > nbits) {
      scaling = 0;
    } else {
      scaling = nbits - t;
    }
  }

  // Perform the actual correlation calculation.
  for (i = 0; i < order + 1; i++) {
    sum = 0;
    /* Unroll the loop to improve performance. */
    for (j = 0; i + j + 3 < in_vector_length; j += 4) {
      sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
      sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
      sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
      sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
    }
    for (; j < in_vector_length - i; j++) {
      sum += (in_vector[j] * in_vector[i + j]) >> scaling;
    }
    *result++ = sum;
  }

  *scale = scaling;
  return order + 1;
}
Exemple #3
0
// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//snrLocPrior is the prior SNR for each frequency (in Q11)
//snrLocPost is the post SNR for each frequency (in Q11)
void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
                               uint16_t* nonSpeechProbFinal,
                               uint32_t* priorLocSnr,
                               uint32_t* postLocSnr) {

  uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
  int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
  int32_t frac32, logTmp;
  int32_t logLrtTimeAvgKsumFX;
  int16_t indPriorFX16;
  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
  int i, normTmp, normTmp2, nShifts;

  // compute feature based on average LR factor
  // this is the average over all frequencies of the smooth log LRT
  logLrtTimeAvgKsumFX = 0;
  for (i = 0; i < inst->magnLen; i++) {
    besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
    normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
    num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
    if (normTmp > 10) {
      den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
    } else {
      den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
    }
    if (den > 0) {
      besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
    } else {
      besselTmpFX32 -= num; // Q11
    }

    // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
    //                                       - inst->logLrtTimeAvg[i]);
    // Here, LRT_TAVG = 0.5
    zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
    frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
    tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
    tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
    tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
    frac32 = tmp32 + 37;
    // tmp32 = log2(priorLocSnr[i])
    tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
    logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8);
                                                  // log2(priorLocSnr[i])*log(2)
    tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1);
                                                  // Q12
    inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12

    logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
  }
  inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
                                              inst->stages + 10);
                                                  // 5 = BIN_SIZE_LRT / 2
  // done with computation of LR factor

  //
  //compute the indicator functions
  //

  // average LRT feature
  // FLOAT code
  // indicator0 = 0.5 * (tanh(widthPrior *
  //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
  tmpIndFX = 16384; // Q14(1.0)
  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
  //use larger width in tanh map for pause regions
  if (tmp32no1 < 0) {
    tmpIndFX = 0;
    tmp32no1 = -tmp32no1;
    //widthPrior = widthPrior * 2.0;
    nShifts++;
  }
  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
  // compute indicator function: sigmoid map
  tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
  if ((tableIndex < 16) && (tableIndex >= 0)) {
    tmp16no2 = kIndicatorTable[tableIndex];
    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
    tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
    if (tmpIndFX == 0) {
      tmpIndFX = 8192 - tmp16no2; // Q14
    } else {
      tmpIndFX = 8192 + tmp16no2; // Q14
    }
  }
  indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14

  //spectral flatness feature
  if (inst->weightSpecFlat) {
    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
    tmpIndFX = 16384; // Q14(1.0)
    //use larger width in tanh map for pause regions
    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
    nShifts = 4;
    if (inst->thresholdSpecFlat < tmpU32no1) {
      tmpIndFX = 0;
      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
      //widthPrior = widthPrior * 2.0;
      nShifts++;
    }
    tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
                                                                  nShifts), 25);
                                                     //Q14
    tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
                                    25); //Q14
    // compute indicator function: sigmoid map
    // FLOAT code
    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
    //                          (threshPrior1 - tmpFloat1)) + 1.0);
    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
    if (tableIndex < 16) {
      tmp16no2 = kIndicatorTable[tableIndex];
      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
      if (tmpIndFX) {
        tmpIndFX = 8192 + tmp16no2; // Q14
      } else {
        tmpIndFX = 8192 - tmp16no2; // Q14
      }
    }
    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
  }

  //for template spectral-difference
  if (inst->weightSpecDiff) {
    tmpU32no1 = 0;
    if (inst->featureSpecDiff) {
      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
                               WebRtcSpl_NormU32(inst->featureSpecDiff));
      tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
                                                         // Q(normTmp-2*stages)
      tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
                                        20 - inst->stages - normTmp);
      if (tmpU32no2 > 0) {
        // Q(20 - inst->stages)
        tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
      } else {
        tmpU32no1 = (uint32_t)(0x7fffffff);
      }
    }
    tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
                                                      17),
                                25);
    tmpU32no2 = tmpU32no1 - tmpU32no3;
    nShifts = 1;
    tmpIndFX = 16384; // Q14(1.0)
    //use larger width in tanh map for pause regions
    if (tmpU32no2 & 0x80000000) {
      tmpIndFX = 0;
      tmpU32no2 = tmpU32no3 - tmpU32no1;
      //widthPrior = widthPrior * 2.0;
      nShifts--;
    }
    tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
    // compute indicator function: sigmoid map
    /* FLOAT code
     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
     */
    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
    if (tableIndex < 16) {
      tmp16no2 = kIndicatorTable[tableIndex];
      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
                    tmp16no1, frac, 14);
      if (tmpIndFX) {
        tmpIndFX = 8192 + tmp16no2;
      } else {
        tmpIndFX = 8192 - tmp16no2;
      }
    }
    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
  }

  //combine the indicator function with the feature weights
  // FLOAT code
  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
  //                 indicator1 + weightIndPrior2 * indicator2);
  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
  // done with computing indicator function

  //compute the prior probability
  // FLOAT code
  // inst->priorNonSpeechProb += PRIOR_UPDATE *
  //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
  inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
                                PRIOR_UPDATE_Q14, tmp16, 14); // Q14

  //final speech probability: combine prior model with LR factor:

  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);

  if (inst->priorNonSpeechProb > 0) {
    for (i = 0; i < inst->magnLen; i++) {
      // FLOAT code
      // invLrt = exp(inst->logLrtTimeAvg[i]);
      // invLrt = inst->priorSpeechProb * invLrt;
      // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
      //                         (1.0 - inst->priorSpeechProb + invLrt);
      // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
      // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
      //                         (inst->priorNonSpeechProb + invLrt);
      if (inst->logLrtTimeAvgW32[i] < 65300) {
        tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(
                                           inst->logLrtTimeAvgW32[i], 23637),
                                         14); // Q12
        intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
        if (intPart < -8) {
          intPart = -8;
        }
        frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12

        // Quadratic approximation of 2^frac
        tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
        tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
        invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
                   + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8

        normTmp = WebRtcSpl_NormW32(invLrtFX);
        normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
        if (normTmp + normTmp2 >= 7) {
          if (normTmp + normTmp2 < 15) {
            invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
            // Q(normTmp+normTmp2-7)
            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
                                            (16384 - inst->priorNonSpeechProb));
            // Q(normTmp+normTmp2+7)
            invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
                                                                  // Q14
          } else {
            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
                                            (16384 - inst->priorNonSpeechProb));
                                                                  // Q22
            invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
          }

          tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb,
                                           8); // Q22

          nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
              (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
        }
      }
    }
  }
}
Exemple #4
0
// Contains a function for the core loop in the normalized lattice MA
// filter routine for iSAC codec, optimized for ARM Neon platform.
// It does:
//  for 0 <= n < HALF_SUBFRAMELEN - 1:
//    *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
//    *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
// Output is not bit-exact with the reference C code, due to the replacement
// of WEBRTC_SPL_MUL_16_32_RSFT15 and LATTICE_MUL_32_32_RSFT16 with Neon
// instructions. The difference should not be bigger than 1.
void WebRtcIsacfix_FilterMaLoopNeon(int16_t input0,  // Filter coefficient
                                    int16_t input1,  // Filter coefficient
                                    int32_t input2,  // Inverse coefficient
                                    int32_t* ptr0,   // Sample buffer
                                    int32_t* ptr1,   // Sample buffer
                                    int32_t* ptr2)   // Sample buffer
{
  int n = 0;
  int loop = (HALF_SUBFRAMELEN - 1) >> 3;
  int loop_tail = (HALF_SUBFRAMELEN - 1) & 0x7;

  int32x4_t input0_v = vdupq_n_s32((int32_t)input0 << 16);
  int32x4_t input1_v = vdupq_n_s32((int32_t)input1 << 16);
  int32x4_t input2_v = vdupq_n_s32(input2);
  int32x4_t tmp0a, tmp1a, tmp2a, tmp3a;
  int32x4_t tmp0b, tmp1b, tmp2b, tmp3b;
  int32x4_t ptr0va, ptr1va, ptr2va;
  int32x4_t ptr0vb, ptr1vb, ptr2vb;

  // Unroll to process 8 samples at once.
  for (n = 0; n < loop; n++) {
    ptr0va = vld1q_s32(ptr0);
    ptr0vb = vld1q_s32(ptr0 + 4);
    ptr0 += 8;

    ptr2va = vld1q_s32(ptr2);
    ptr2vb = vld1q_s32(ptr2 + 4);

    // Calculate tmp0 = (*ptr0) * input0.
    tmp0a = vqrdmulhq_s32(ptr0va, input0_v);
    tmp0b = vqrdmulhq_s32(ptr0vb, input0_v);

    // Calculate tmp1 = (*ptr0) * input1.
    tmp1a = vqrdmulhq_s32(ptr0va, input1_v);
    tmp1b = vqrdmulhq_s32(ptr0vb, input1_v);

    // Calculate tmp2 = tmp0 + *(ptr2).
    tmp2a = vaddq_s32(tmp0a, ptr2va);
    tmp2b = vaddq_s32(tmp0b, ptr2vb);
    tmp2a = vshlq_n_s32(tmp2a, 15);
    tmp2b = vshlq_n_s32(tmp2b, 15);

    // Calculate *ptr2 = input2 * tmp2.
    ptr2va = vqrdmulhq_s32(tmp2a, input2_v);
    ptr2vb = vqrdmulhq_s32(tmp2b, input2_v);

    vst1q_s32(ptr2, ptr2va);
    vst1q_s32(ptr2 + 4, ptr2vb);
    ptr2 += 8;

    // Calculate tmp3 = ptr2v * input0.
    tmp3a = vqrdmulhq_s32(ptr2va, input0_v);
    tmp3b = vqrdmulhq_s32(ptr2vb, input0_v);

    // Calculate *ptr1 = tmp1 + tmp3.
    ptr1va = vaddq_s32(tmp1a, tmp3a);
    ptr1vb = vaddq_s32(tmp1b, tmp3b);

    vst1q_s32(ptr1, ptr1va);
    vst1q_s32(ptr1 + 4, ptr1vb);
    ptr1 += 8;
  }

  // Process four more samples.
  if (loop_tail & 0x4) {
    ptr0va = vld1q_s32(ptr0);
    ptr2va = vld1q_s32(ptr2);
    ptr0 += 4;

    // Calculate tmp0 = (*ptr0) * input0.
    tmp0a = vqrdmulhq_s32(ptr0va, input0_v);

    // Calculate tmp1 = (*ptr0) * input1.
    tmp1a = vqrdmulhq_s32(ptr0va, input1_v);

    // Calculate tmp2 = tmp0 + *(ptr2).
    tmp2a = vaddq_s32(tmp0a, ptr2va);
    tmp2a = vshlq_n_s32(tmp2a, 15);

    // Calculate *ptr2 = input2 * tmp2.
    ptr2va = vqrdmulhq_s32(tmp2a, input2_v);

    vst1q_s32(ptr2, ptr2va);
    ptr2 += 4;

    // Calculate tmp3 = *(ptr2) * input0.
    tmp3a = vqrdmulhq_s32(ptr2va, input0_v);

    // Calculate *ptr1 = tmp1 + tmp3.
    ptr1va = vaddq_s32(tmp1a, tmp3a);

    vst1q_s32(ptr1, ptr1va);
    ptr1 += 4;
  }

  // Process two more samples.
  if (loop_tail & 0x2) {
    int32x2_t ptr0v_tail, ptr2v_tail, ptr1v_tail;
    int32x2_t tmp0_tail, tmp1_tail, tmp2_tail, tmp3_tail;
    ptr0v_tail = vld1_s32(ptr0);
    ptr2v_tail = vld1_s32(ptr2);
    ptr0 += 2;

    // Calculate tmp0 = (*ptr0) * input0.
    tmp0_tail = vqrdmulh_s32(ptr0v_tail, vget_low_s32(input0_v));

    // Calculate tmp1 = (*ptr0) * input1.
    tmp1_tail = vqrdmulh_s32(ptr0v_tail, vget_low_s32(input1_v));

    // Calculate tmp2 = tmp0 + *(ptr2).
    tmp2_tail = vadd_s32(tmp0_tail, ptr2v_tail);
    tmp2_tail = vshl_n_s32(tmp2_tail, 15);

    // Calculate *ptr2 = input2 * tmp2.
    ptr2v_tail = vqrdmulh_s32(tmp2_tail, vget_low_s32(input2_v));

    vst1_s32(ptr2, ptr2v_tail);
    ptr2 += 2;

    // Calculate tmp3 = *(ptr2) * input0.
    tmp3_tail = vqrdmulh_s32(ptr2v_tail, vget_low_s32(input0_v));

    // Calculate *ptr1 = tmp1 + tmp3.
    ptr1v_tail = vadd_s32(tmp1_tail, tmp3_tail);

    vst1_s32(ptr1, ptr1v_tail);
    ptr1 += 2;
  }

  // Process one more sample.
  if (loop_tail & 0x1) {
    int16_t t16a = (int16_t)(input2 >> 16);
    int16_t t16b = (int16_t)input2;
    if (t16b < 0) t16a++;
    int32_t tmp32a;
    int32_t tmp32b;

    // Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0)).
    tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0);
    tmp32b = *ptr2 + tmp32a;
    *ptr2 = (int32_t)(WEBRTC_SPL_MUL(t16a, tmp32b) +
                       (WEBRTC_SPL_MUL_16_32_RSFT16(t16b, tmp32b)));

    // Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2).
    tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0);
    tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2);
    *ptr1 = tmp32a + tmp32b;
  }