int16_t WebRtcIsacfix_DecodeImpl(int16_t *signal_out16, ISACFIX_DecInst_t *ISACdec_obj, int16_t *current_framesamples) { int k; int err; int16_t BWno; int16_t len = 0; int16_t model; int16_t Vector_Word16_1[FRAMESAMPLES/2]; int16_t Vector_Word16_2[FRAMESAMPLES/2]; int32_t Vector_Word32_1[FRAMESAMPLES/2]; int32_t Vector_Word32_2[FRAMESAMPLES/2]; int16_t lofilt_coefQ15[ORDERLO*SUBFRAMES]; //refl. coeffs int16_t hifilt_coefQ15[ORDERHI*SUBFRAMES]; //refl. coeffs int32_t gain_lo_hiQ17[2*SUBFRAMES]; int16_t PitchLags_Q7[PITCH_SUBFRAMES]; int16_t PitchGains_Q12[PITCH_SUBFRAMES]; int16_t AvgPitchGain_Q12; int16_t tmp_1, tmp_2; int32_t tmp32a, tmp32b; int16_t gainQ13; int16_t frame_nb; /* counter */ int16_t frame_mode; /* 0 for 20ms and 30ms, 1 for 60ms */ int16_t processed_samples; /* PLC */ int16_t overlapWin[ 240 ]; (ISACdec_obj->bitstr_obj).W_upper = 0xFFFFFFFF; (ISACdec_obj->bitstr_obj).streamval = 0; (ISACdec_obj->bitstr_obj).stream_index = 0; (ISACdec_obj->bitstr_obj).full = 1; /* decode framelength and BW estimation - not used, only for stream pointer*/ err = WebRtcIsacfix_DecodeFrameLen(&ISACdec_obj->bitstr_obj, current_framesamples); if (err<0) // error check return err; frame_mode = (int16_t)WEBRTC_SPL_DIV(*current_framesamples, MAX_FRAMESAMPLES); /* 0, or 1 */ processed_samples = (int16_t)WEBRTC_SPL_DIV(*current_framesamples, frame_mode+1); /* either 320 (20ms) or 480 (30, 60 ms) */ err = WebRtcIsacfix_DecodeSendBandwidth(&ISACdec_obj->bitstr_obj, &BWno); if (err<0) // error check return err; /* one loop if it's one frame (20 or 30ms), 2 loops if 2 frames bundled together (60ms) */ for (frame_nb = 0; frame_nb <= frame_mode; frame_nb++) { /* decode & dequantize pitch parameters */ err = WebRtcIsacfix_DecodePitchGain(&(ISACdec_obj->bitstr_obj), PitchGains_Q12); if (err<0) // error check return err; err = WebRtcIsacfix_DecodePitchLag(&ISACdec_obj->bitstr_obj, PitchGains_Q12, PitchLags_Q7); if (err<0) // error check return err; AvgPitchGain_Q12 = (int16_t)(((int32_t)PitchGains_Q12[0] + PitchGains_Q12[1] + PitchGains_Q12[2] + PitchGains_Q12[3])>>2); /* decode & dequantize FiltCoef */ err = WebRtcIsacfix_DecodeLpc(gain_lo_hiQ17, lofilt_coefQ15, hifilt_coefQ15, &ISACdec_obj->bitstr_obj, &model); if (err<0) // error check return err; /* decode & dequantize spectrum */ len = WebRtcIsacfix_DecodeSpec(&ISACdec_obj->bitstr_obj, Vector_Word16_1, Vector_Word16_2, AvgPitchGain_Q12); if (len < 0) // error check return len; // Why does this need Q16 in and out? /JS WebRtcIsacfix_Spec2Time(Vector_Word16_1, Vector_Word16_2, Vector_Word32_1, Vector_Word32_2); for (k=0; k<FRAMESAMPLES/2; k++) { Vector_Word16_1[k] = (int16_t)WEBRTC_SPL_RSHIFT_W32(Vector_Word32_1[k]+64, 7); //Q16 -> Q9 } /* ---- If this is recovery frame ---- */ if( (ISACdec_obj->plcstr_obj).used == PLC_WAS_USED ) { (ISACdec_obj->plcstr_obj).used = PLC_NOT_USED; if( (ISACdec_obj->plcstr_obj).B < 1000 ) { (ISACdec_obj->plcstr_obj).decayCoeffPriodic = 4000; } ISACdec_obj->plcstr_obj.decayCoeffPriodic = WEBRTC_SPL_WORD16_MAX; /* DECAY_RATE is in Q15 */ ISACdec_obj->plcstr_obj.decayCoeffNoise = WEBRTC_SPL_WORD16_MAX; /* DECAY_RATE is in Q15 */ ISACdec_obj->plcstr_obj.pitchCycles = 0; PitchGains_Q12[0] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(PitchGains_Q12[0], 700, 10 ); /* ---- Add-overlap ---- */ WebRtcSpl_GetHanningWindow( overlapWin, RECOVERY_OVERLAP ); for( k = 0; k < RECOVERY_OVERLAP; k++ ) Vector_Word16_1[k] = WebRtcSpl_AddSatW16( (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( (ISACdec_obj->plcstr_obj).overlapLP[k], overlapWin[RECOVERY_OVERLAP - k - 1], 14), (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( Vector_Word16_1[k], overlapWin[k], 14) ); } /* --- Store side info --- */ if( frame_nb == frame_mode ) { /* --- LPC info */ WEBRTC_SPL_MEMCPY_W16( (ISACdec_obj->plcstr_obj).lofilt_coefQ15, &lofilt_coefQ15[(SUBFRAMES-1)*ORDERLO], ORDERLO ); WEBRTC_SPL_MEMCPY_W16( (ISACdec_obj->plcstr_obj).hifilt_coefQ15, &hifilt_coefQ15[(SUBFRAMES-1)*ORDERHI], ORDERHI ); (ISACdec_obj->plcstr_obj).gain_lo_hiQ17[0] = gain_lo_hiQ17[(SUBFRAMES-1) * 2]; (ISACdec_obj->plcstr_obj).gain_lo_hiQ17[1] = gain_lo_hiQ17[(SUBFRAMES-1) * 2 + 1]; /* --- LTP info */ (ISACdec_obj->plcstr_obj).AvgPitchGain_Q12 = PitchGains_Q12[3]; (ISACdec_obj->plcstr_obj).lastPitchGain_Q12 = PitchGains_Q12[3]; (ISACdec_obj->plcstr_obj).lastPitchLag_Q7 = PitchLags_Q7[3]; if( PitchLags_Q7[3] < 3000 ) (ISACdec_obj->plcstr_obj).lastPitchLag_Q7 += PitchLags_Q7[3]; WEBRTC_SPL_MEMCPY_W16( (ISACdec_obj->plcstr_obj).prevPitchInvIn, Vector_Word16_1, FRAMESAMPLES/2 ); } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* inverse pitch filter */ WebRtcIsacfix_PitchFilter(Vector_Word16_1, Vector_Word16_2, &ISACdec_obj->pitchfiltstr_obj, PitchLags_Q7, PitchGains_Q12, 4); if( frame_nb == frame_mode ) { WEBRTC_SPL_MEMCPY_W16( (ISACdec_obj->plcstr_obj).prevPitchInvOut, &(Vector_Word16_2[FRAMESAMPLES/2 - (PITCH_MAX_LAG + 10)]), PITCH_MAX_LAG ); } /* reduce gain to compensate for pitch enhancer */ /* gain = 1.0f - 0.45f * AvgPitchGain; */ tmp32a = WEBRTC_SPL_MUL_16_16_RSFT(AvgPitchGain_Q12, 29, 0); // Q18 tmp32b = 262144 - tmp32a; // Q18 gainQ13 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32b, 5); // Q13 for (k = 0; k < FRAMESAMPLES/2; k++) { Vector_Word32_1[k] = (int32_t) WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_16(Vector_Word16_2[k], gainQ13), 3); // Q25 } /* perceptual post-filtering (using normalized lattice filter) */ WebRtcIsacfix_NormLatticeFilterAr(ORDERLO, (ISACdec_obj->maskfiltstr_obj).PostStateLoGQ0, Vector_Word32_1, lofilt_coefQ15, gain_lo_hiQ17, 0, Vector_Word16_1); /* --- Store Highpass Residual --- */ for (k = 0; k < FRAMESAMPLES/2; k++) Vector_Word32_1[k] = WEBRTC_SPL_LSHIFT_W32(Vector_Word32_2[k], 9); // Q16 -> Q25 for( k = 0; k < PITCH_MAX_LAG + 10; k++ ) (ISACdec_obj->plcstr_obj).prevHP[k] = Vector_Word32_1[FRAMESAMPLES/2 - (PITCH_MAX_LAG + 10) + k]; WebRtcIsacfix_NormLatticeFilterAr(ORDERHI, (ISACdec_obj->maskfiltstr_obj).PostStateHiGQ0, Vector_Word32_1, hifilt_coefQ15, gain_lo_hiQ17, 1, Vector_Word16_2); /* recombine the 2 bands */ /* Form the polyphase signals, and compensate for DC offset */ for (k=0;k<FRAMESAMPLES/2;k++) { tmp_1 = (int16_t)WebRtcSpl_SatW32ToW16(((int32_t)Vector_Word16_1[k]+Vector_Word16_2[k] + 1)); /* Construct a new upper channel signal*/ tmp_2 = (int16_t)WebRtcSpl_SatW32ToW16(((int32_t)Vector_Word16_1[k]-Vector_Word16_2[k])); /* Construct a new lower channel signal*/ Vector_Word16_1[k] = tmp_1; Vector_Word16_2[k] = tmp_2; } WebRtcIsacfix_FilterAndCombine1(Vector_Word16_1, Vector_Word16_2, signal_out16 + frame_nb * processed_samples, &ISACdec_obj->postfiltbankstr_obj); } return len; }
// Compute speech/noise probability // speech/noise probability is returned in: probSpeechFinal //snrLocPrior is the prior SNR for each frequency (in Q11) //snrLocPost is the post SNR for each frequency (in Q11) void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, uint16_t* nonSpeechProbFinal, uint32_t* priorLocSnr, uint32_t* postLocSnr) { uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; int32_t frac32, logTmp; int32_t logLrtTimeAvgKsumFX; int16_t indPriorFX16; int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; int i, normTmp, normTmp2, nShifts; // compute feature based on average LR factor // this is the average over all frequencies of the smooth log LRT logLrtTimeAvgKsumFX = 0; for (i = 0; i < inst->magnLen; i++) { besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 normTmp = WebRtcSpl_NormU32(postLocSnr[i]); num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp) if (normTmp > 10) { den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp) } else { den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp) } if (den > 0) { besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11 } else { besselTmpFX32 -= num; // Q11 } // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) // - inst->logLrtTimeAvg[i]); // Here, LRT_TAVG = 0.5 zeros = WebRtcSpl_NormU32(priorLocSnr[i]); frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); tmp32 = WEBRTC_SPL_MUL(frac32, frac32); tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19); tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12); frac32 = tmp32 + 37; // tmp32 = log2(priorLocSnr[i]) tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2) tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12 inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 } inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2 // done with computation of LR factor // //compute the indicator functions // // average LRT feature // FLOAT code // indicator0 = 0.5 * (tanh(widthPrior * // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); tmpIndFX = 16384; // Q14(1.0) tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; //use larger width in tanh map for pause regions if (tmp32no1 < 0) { tmpIndFX = 0; tmp32no1 = -tmp32no1; //widthPrior = widthPrior * 2.0; nShifts++; } tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 // compute indicator function: sigmoid map tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); if ((tableIndex < 16) && (tableIndex >= 0)) { tmp16no2 = kIndicatorTable[tableIndex]; tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); if (tmpIndFX == 0) { tmpIndFX = 8192 - tmp16no2; // Q14 } else { tmpIndFX = 8192 + tmp16no2; // Q14 } } indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 //spectral flatness feature if (inst->weightSpecFlat) { tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 tmpIndFX = 16384; // Q14(1.0) //use larger width in tanh map for pause regions tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 nShifts = 4; if (inst->thresholdSpecFlat < tmpU32no1) { tmpIndFX = 0; tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; //widthPrior = widthPrior * 2.0; nShifts++; } tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14 tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14 // compute indicator function: sigmoid map // FLOAT code // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * // (threshPrior1 - tmpFloat1)) + 1.0); tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); if (tableIndex < 16) { tmp16no2 = kIndicatorTable[tableIndex]; tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); if (tmpIndFX) { tmpIndFX = 8192 + tmp16no2; // Q14 } else { tmpIndFX = 8192 - tmp16no2; // Q14 } } indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 } //for template spectral-difference if (inst->weightSpecDiff) { tmpU32no1 = 0; if (inst->featureSpecDiff) { normTmp = WEBRTC_SPL_MIN(20 - inst->stages, WebRtcSpl_NormU32(inst->featureSpecDiff)); tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages) tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages - normTmp); if (tmpU32no2 > 0) { // Q(20 - inst->stages) tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); } else { tmpU32no1 = (uint32_t)(0x7fffffff); } } tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25); tmpU32no2 = tmpU32no1 - tmpU32no3; nShifts = 1; tmpIndFX = 16384; // Q14(1.0) //use larger width in tanh map for pause regions if (tmpU32no2 & 0x80000000) { tmpIndFX = 0; tmpU32no2 = tmpU32no3 - tmpU32no1; //widthPrior = widthPrior * 2.0; nShifts--; } tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); // compute indicator function: sigmoid map /* FLOAT code indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); */ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); if (tableIndex < 16) { tmp16no2 = kIndicatorTable[tableIndex]; tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( tmp16no1, frac, 14); if (tmpIndFX) { tmpIndFX = 8192 + tmp16no2; } else { tmpIndFX = 8192 - tmp16no2; } } indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 } //combine the indicator function with the feature weights // FLOAT code // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * // indicator1 + weightIndPrior2 * indicator2); indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 // done with computing indicator function //compute the prior probability // FLOAT code // inst->priorNonSpeechProb += PRIOR_UPDATE * // (indPriorNonSpeech - inst->priorNonSpeechProb); tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( PRIOR_UPDATE_Q14, tmp16, 14); // Q14 //final speech probability: combine prior model with LR factor: memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); if (inst->priorNonSpeechProb > 0) { for (i = 0; i < inst->magnLen; i++) { // FLOAT code // invLrt = exp(inst->logLrtTimeAvg[i]); // invLrt = inst->priorSpeechProb * invLrt; // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / // (1.0 - inst->priorSpeechProb + invLrt); // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / // (inst->priorNonSpeechProb + invLrt); if (inst->logLrtTimeAvgW32[i] < 65300) { tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL( inst->logLrtTimeAvgW32[i], 23637), 14); // Q12 intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12); if (intPart < -8) { intPart = -8; } frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 // Quadratic approximation of 2^frac tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12 tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12 invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart) + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 normTmp = WebRtcSpl_NormW32(invLrtFX); normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); if (normTmp + normTmp2 >= 7) { if (normTmp + normTmp2 < 15) { invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp); // Q(normTmp+normTmp2-7) tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q(normTmp+normTmp2+7) invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14 } else { tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22 invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14 } tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, 8); // Q22 nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1, (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8 } } } } }