void Update_Err(Word16 Olp, Word16 AcLg, Word16 AcGn,ENC_HANDLE *handle) { Word16 *ptr_tab; Word16 i, iz, temp1, temp2; Word16 Lag; Word32 Worst1, Worst0, L_temp; Word16 beta; Lag = Olp - (Word16)Pstep + AcLg; /* Select Quantization tables */ i = 0 ; ptr_tab = tabgain85; if ( handle->mode == G723_63 ) { if ( Olp >= (Word16) (SubFrLen-2) ) ptr_tab = tabgain170; } else { ptr_tab = tabgain170; } beta = ptr_tab[(int)AcGn]; /* beta = gain * 8192 */ if(Lag <= (SubFrLen/2)) { Worst0 = L_mls(handle->CodStat.Err[0], beta); Worst0 = L_shl(Worst0, 2); Worst0 = L_add(Err0, Worst0); Worst1 = Worst0; } else { iz = mult(Lag, 1092); /* Lag / 30 */ temp1 = add(iz, 1); temp2 = sub(shl(temp1, 5), shl(temp1, 1)); /* 30 (iz+1) */ if(temp2 != Lag) { if(iz == 1) { Worst0 = L_mls(handle->CodStat.Err[0], beta); Worst0 = L_shl(Worst0, 2); Worst0 = L_add(Err0, Worst0); Worst1 = L_mls(handle->CodStat.Err[1], beta); Worst1 = L_shl(Worst1, 2); Worst1 = L_add(Err0, Worst1); if(Worst0 > Worst1) Worst1 = Worst0; else Worst0 = Worst1; } else { Worst0 = L_mls(handle->CodStat.Err[iz-2], beta); Worst0 = L_shl(Worst0, 2); Worst0 = L_add(Err0, Worst0); L_temp = L_mls(handle->CodStat.Err[iz-1], beta); L_temp = L_shl(L_temp, 2); L_temp = L_add(Err0, L_temp); if(L_temp > Worst0) Worst0 = L_temp; Worst1 = L_mls(handle->CodStat.Err[iz], beta); Worst1 = L_shl(Worst1, 2); Worst1 = L_add(Err0, Worst1); if(L_temp > Worst1) Worst1 = L_temp; } } else { /* Lag % SubFrLen = 0 */ Worst0 = L_mls(handle->CodStat.Err[iz-1], beta); Worst0 = L_shl(Worst0, 2); Worst0 = L_add(Err0, Worst0); Worst1 = L_mls(handle->CodStat.Err[iz], beta); Worst1 = L_shl(Worst1, 2); Worst1 = L_add(Err0, Worst1); } } for(i=4; i>=2; i--) { handle->CodStat.Err[i] = handle->CodStat.Err[i-2]; } handle->CodStat.Err[0] = Worst0; handle->CodStat.Err[1] = Worst1; return; }
void Coder_ld8g( Word16 ana[], /* (o) : analysis parameters */ Word16 frame, /* input : frame counter */ Word16 dtx_enable, /* input : DTX enable flag */ Word16 rate /* input : rate selector/frame =0 6.4kbps , =1 8kbps,= 2 11.8 kbps*/ ) { /* LPC analysis */ Word16 r_l_fwd[NP+1], r_h_fwd[NP+1]; /* Autocorrelations low and hi (forward) */ Word32 r_bwd[M_BWDP1]; /* Autocorrelations (backward) */ Word16 r_l_bwd[M_BWDP1]; /* Autocorrelations low (backward) */ Word16 r_h_bwd[M_BWDP1]; /* Autocorrelations high (backward) */ Word16 rc_fwd[M]; /* Reflection coefficients : forward analysis */ Word16 rc_bwd[M_BWD]; /* Reflection coefficients : backward analysis */ Word16 A_t_fwd[MP1*2]; /* A(z) forward unquantized for the 2 subframes */ Word16 A_t_fwd_q[MP1*2]; /* A(z) forward quantized for the 2 subframes */ Word16 A_t_bwd[2*M_BWDP1]; /* A(z) backward for the 2 subframes */ Word16 *Aq; /* A(z) "quantized" for the 2 subframes */ Word16 *Ap; /* A(z) "unquantized" for the 2 subframes */ Word16 *pAp, *pAq; Word16 Ap1[M_BWDP1]; /* A(z) with spectral expansion */ Word16 Ap2[M_BWDP1]; /* A(z) with spectral expansion */ Word16 lsp_new[M], lsp_new_q[M]; /* LSPs at 2th subframe */ Word16 lsf_int[M]; /* Interpolated LSF 1st subframe. */ Word16 lsf_new[M]; Word16 lp_mode; /* Backward / Forward Indication mode */ Word16 m_ap, m_aq, i_gamma; Word16 code_lsp[2]; /* Other vectors */ Word16 h1[L_SUBFR]; /* Impulse response h1[] */ Word16 xn[L_SUBFR]; /* Target vector for pitch search */ Word16 xn2[L_SUBFR]; /* Target vector for codebook search */ Word16 code[L_SUBFR]; /* Fixed codebook excitation */ Word16 y1[L_SUBFR]; /* Filtered adaptive excitation */ Word16 y2[L_SUBFR]; /* Filtered fixed codebook excitation */ Word16 g_coeff[4]; /* Correlations between xn & y1 */ Word16 res2[L_SUBFR]; /* residual after long term prediction*/ Word16 g_coeff_cs[5]; Word16 exp_g_coeff_cs[5]; /* Correlations between xn, y1, & y2 <y1,y1>, -2<xn,y1>, <y2,y2>, -2<xn,y2>, 2<y1,y2> */ /* Scalars */ Word16 i, j, k, i_subfr; Word16 T_op, T0, T0_min, T0_max, T0_frac; Word16 gain_pit, gain_code, index; Word16 taming, pit_sharp; Word16 sat_filter; Word32 L_temp; Word16 freq_cur[M]; /* For G.729B */ Word16 rh_nbe[MP1]; Word16 lsfq_mem[MA_NP][M]; Word16 exp_R0, Vad; Word16 tmp1, tmp2,avg_lag; Word16 temp, Energy_db; /*------------------------------------------------------------------------* * - Perform LPC analysis: * * * autocorrelation + lag windowing * * * Levinson-durbin algorithm to find a[] * * * convert a[] to lsp[] * * * quantize and code the LSPs * * * find the interpolated LSPs and convert to a[] for the 2 * * subframes (both quantized and unquantized) * *------------------------------------------------------------------------*/ /* ------------------- */ /* LP Forward analysis */ /* ------------------- */ Autocorrg(p_window, NP, r_h_fwd, r_l_fwd, &exp_R0); /* Autocorrelations */ Copy(r_h_fwd, rh_nbe, MP1); Lag_window(NP, r_h_fwd, r_l_fwd); /* Lag windowing */ Levinsong(M, r_h_fwd, r_l_fwd, &A_t_fwd[MP1], rc_fwd, old_A_fwd, old_rc_fwd,&temp ); /* Levinson Durbin */ Az_lsp(&A_t_fwd[MP1], lsp_new, lsp_old); /* From A(z) to lsp */ /* For G.729B */ /* ------ VAD ------- */ if (dtx_enable == 1) { Lsp_lsf(lsp_new, lsf_new, M); vadg(rc_fwd[1], lsf_new, r_h_fwd, r_l_fwd, exp_R0, p_window, frame, pastVad, ppastVad, &Vad, &Energy_db); musdetect( rate, r_h_fwd[0], r_l_fwd[0], exp_R0,rc_fwd ,lag_buf , pgain_buf, prev_lp_mode, frame,pastVad, &Vad, Energy_db); Update_cng(rh_nbe, exp_R0, Vad); } else Vad = 1; /* -------------------- */ /* LP Backward analysis */ /* -------------------- */ /* -------------------- */ /* LP Backward analysis */ /* -------------------- */ if ( (rate-(1-Vad))== G729E) { /* LPC recursive Window as in G728 */ autocorr_hyb_window(synth, r_bwd, rexp); /* Autocorrelations */ Lag_window_bwd(r_bwd, r_h_bwd, r_l_bwd); /* Lag windowing */ /* Fixed Point Levinson (as in G729) */ Levinsong(M_BWD, r_h_bwd, r_l_bwd, &A_t_bwd[M_BWDP1], rc_bwd, old_A_bwd, old_rc_bwd, &temp ); /* Tests saturation of A_t_bwd */ sat_filter = 0; for (i=M_BWDP1; i<2*M_BWDP1; i++) if (A_t_bwd[i] >= 32767) sat_filter = 1; if (sat_filter == 1) Copy(A_t_bwd_mem, &A_t_bwd[M_BWDP1], M_BWDP1); else Copy(&A_t_bwd[M_BWDP1], A_t_bwd_mem, M_BWDP1); /* Additional bandwidth expansion on backward filter */ Weight_Az(&A_t_bwd[M_BWDP1], GAMMA_BWD, M_BWD, &A_t_bwd[M_BWDP1]); } /*--------------------------------------------------* * Update synthesis signal for next frame. * *--------------------------------------------------*/ Copy(&synth[L_FRAME], &synth[0], MEM_SYN_BWD); /*--------------------------------------------------------------------* * Find interpolated LPC parameters in all subframes (unquantized). * * The interpolated parameters are in array A_t[] of size (M+1)*4 * *--------------------------------------------------------------------*/ if( prev_lp_mode == 0) { Int_lpc(lsp_old, lsp_new, lsf_int, lsf_new, A_t_fwd); } else { /* no interpolation */ /* unquantized */ Lsp_Az(lsp_new, A_t_fwd); /* Subframe 1 */ Lsp_lsf(lsp_new, lsf_new, M); /* transformation from LSP to LSF (freq.domain) */ Copy(lsf_new, lsf_int, M); /* Subframe 1 */ } if(Vad == 1) { /* ---------------- */ /* LSP quantization */ /* ---------------- */ Qua_lspe(lsp_new, lsp_new_q, code_lsp, freq_prev, freq_cur); /*--------------------------------------------------------------------* * Find interpolated LPC parameters in all subframes (quantized) * * the quantized interpolated parameters are in array Aq_t[] * *--------------------------------------------------------------------*/ if( prev_lp_mode == 0) { Int_qlpc(lsp_old_q, lsp_new_q, A_t_fwd_q); } else { /* no interpolation */ Lsp_Az(lsp_new_q, &A_t_fwd_q[MP1]); /* Subframe 2 */ Copy(&A_t_fwd_q[MP1], A_t_fwd_q, MP1); /* Subframe 1 */ } /*---------------------------------------------------------------------* * - Decision for the switch Forward / Backward * *---------------------------------------------------------------------*/ if(rate == G729E) { set_lpc_modeg(speech, A_t_fwd_q, A_t_bwd, &lp_mode, lsp_new, lsp_old, &bwd_dominant, prev_lp_mode, prev_filter, &C_int, &glob_stat, &stat_bwd, &val_stat_bwd); } else { update_bwd( &lp_mode, &bwd_dominant, &C_int, &glob_stat); } } else update_bwd( &lp_mode, &bwd_dominant, &C_int, &glob_stat); /* ---------------------------------- */ /* update the LSPs for the next frame */ /* ---------------------------------- */ Copy(lsp_new, lsp_old, M); /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * *----------------------------------------------------------------------*/ if(lp_mode == 0) { m_ap = M; if (bwd_dominant == 0) Ap = A_t_fwd; else Ap = A_t_fwd_q; perc_var(gamma1, gamma2, lsf_int, lsf_new, rc_fwd); } else { if (bwd_dominant == 0) { m_ap = M; Ap = A_t_fwd; } else { m_ap = M_BWD; Ap = A_t_bwd; } perc_vare(gamma1, gamma2, bwd_dominant); } pAp = Ap; for (i=0; i<2; i++) { Weight_Az(pAp, gamma1[i], m_ap, Ap1); Weight_Az(pAp, gamma2[i], m_ap, Ap2); Residue(m_ap, Ap1, &speech[i*L_SUBFR], &wsp[i*L_SUBFR], L_SUBFR); Syn_filte(m_ap, Ap2, &wsp[i*L_SUBFR], &wsp[i*L_SUBFR], L_SUBFR, &mem_w[M_BWD-m_ap], 0); for(j=0; j<M_BWD; j++) mem_w[j] = wsp[i*L_SUBFR+L_SUBFR-M_BWD+j]; pAp += m_ap+1; } /* ---------------------- */ /* Case of Inactive frame */ /* ---------------------- */ if (Vad == 0){ for (i=0; i<MA_NP; i++) Copy(&freq_prev[i][0], &lsfq_mem[i][0], M); Cod_cngg(exc, pastVad, lsp_old_q, old_A_fwd, old_rc_fwd, A_t_fwd_q, ana, lsfq_mem, &seed); for (i=0; i<MA_NP; i++) Copy(&lsfq_mem[i][0], &freq_prev[i][0], M); ppastVad = pastVad; pastVad = Vad; /* UPDATE wsp, mem_w, mem_syn, mem_err, and mem_w0 */ pAp = A_t_fwd; /* pointer to interpolated LPC parameters */ pAq = A_t_fwd_q; /* pointer to interpolated quantized LPC parameters */ i_gamma = 0; for(i_subfr=0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { Weight_Az(pAp, gamma1[i_gamma], M, Ap1); Weight_Az(pAp, gamma2[i_gamma], M, Ap2); i_gamma = add(i_gamma,1); /* update mem_syn */ Syn_filte(M, pAq, &exc[i_subfr], &synth_ptr[i_subfr], L_SUBFR, &mem_syn[M_BWD-M], 0); for(j=0; j<M_BWD; j++) mem_syn[j] = synth_ptr[i_subfr+L_SUBFR-M_BWD+j]; /* update mem_w0 */ for (i=0; i<L_SUBFR; i++) error[i] = speech[i_subfr+i] - synth_ptr[i_subfr+i]; Residue(M, Ap1, error, xn, L_SUBFR); Syn_filte(M, Ap2, xn, xn, L_SUBFR, &mem_w0[M_BWD-M], 0); for(j=0; j<M_BWD; j++) mem_w0[j] = xn[L_SUBFR-M_BWD+j]; /* update mem_err */ for (i = L_SUBFR-M_BWD, j = 0; i < L_SUBFR; i++, j++) mem_err[j] = error[i]; for (i= 0; i< 4; i++) pgain_buf[i] = pgain_buf[i+1]; pgain_buf[4] = 8192; pAp += MP1; pAq += MP1; } /* update previous filter for next frame */ Copy(&A_t_fwd_q[MP1], prev_filter, MP1); for(i=MP1; i <M_BWDP1; i++) prev_filter[i] = 0; prev_lp_mode = lp_mode; sharp = SHARPMIN; /* Update memories for next frames */ Copy(&old_speech[L_FRAME], &old_speech[0], L_TOTAL-L_FRAME); Copy(&old_wsp[L_FRAME], &old_wsp[0], PIT_MAX); Copy(&old_exc[L_FRAME], &old_exc[0], PIT_MAX+L_INTERPOL); return; } /* End of inactive frame case */ /* -------------------- */ /* Case of Active frame */ /* -------------------- */ *ana++ = rate+ (Word16)2; /* bit rate mode */ if(lp_mode == 0) { m_aq = M; Aq = A_t_fwd_q; /* update previous filter for next frame */ Copy(&Aq[MP1], prev_filter, MP1); for(i=MP1; i <M_BWDP1; i++) prev_filter[i] = 0; for(j=MP1; j<M_BWDP1; j++) ai_zero[j] = 0; } else { m_aq = M_BWD; Aq = A_t_bwd; if (bwd_dominant == 0) { for(j=MP1; j<M_BWDP1; j++) ai_zero[j] = 0; } /* update previous filter for next frame */ Copy(&Aq[M_BWDP1], prev_filter, M_BWDP1); } if(dtx_enable == 1) { seed = INIT_SEED; ppastVad = pastVad; pastVad = Vad; } if (rate == G729E) *ana++ = lp_mode; /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * * - Find the open-loop pitch delay * *----------------------------------------------------------------------*/ if( lp_mode == 0) { Copy(lsp_new_q, lsp_old_q, M); Lsp_prev_update(freq_cur, freq_prev); *ana++ = code_lsp[0]; *ana++ = code_lsp[1]; } /* Find open loop pitch lag */ T_op = Pitch_ol(wsp, PIT_MIN, PIT_MAX, L_FRAME); for (i= 0; i< 4; i++) lag_buf[i] = lag_buf[i+1]; avg_lag = add(lag_buf[0],lag_buf[1]); avg_lag = add(avg_lag,lag_buf[2]); avg_lag = add(avg_lag,lag_buf[3]); avg_lag = mult_r(avg_lag,8192); tmp1 = sub(T_op,shl(avg_lag,1)); tmp2 = sub(T_op,add(shl(avg_lag,1),avg_lag)); if( sub(abs_s(tmp1), 4)<0){ lag_buf[4] = shr(T_op,1); } else if( sub(abs_s(tmp2),6)<0){ lag_buf[4] = mult(T_op,10923); } else{ lag_buf[4] = T_op; } /* Range for closed loop pitch search in 1st subframe */ T0_min = sub(T_op, 3); if (sub(T0_min,PIT_MIN)<0) { T0_min = PIT_MIN; } T0_max = add(T0_min, 6); if (sub(T0_max ,PIT_MAX)>0) { T0_max = PIT_MAX; T0_min = sub(T0_max, 6); } /*------------------------------------------------------------------------* * Loop for every subframe in the analysis frame * *------------------------------------------------------------------------* * To find the pitch and innovation parameters. The subframe size is * * L_SUBFR and the loop is repeated 2 times. * * - find the weighted LPC coefficients * * - find the LPC residual signal res[] * * - compute the target signal for pitch search * * - compute impulse response of weighted synthesis filter (h1[]) * * - find the closed-loop pitch parameters * * - encode the pitch delay * * - update the impulse response h1[] by including fixed-gain pitch * * - find target vector for codebook search * * - codebook search * * - encode codebook address * * - VQ of pitch and codebook gains * * - find synthesis speech * * - update states of weighting filter * *------------------------------------------------------------------------*/ pAp = Ap; /* pointer to interpolated "unquantized"LPC parameters */ pAq = Aq; /* pointer to interpolated "quantized" LPC parameters */ i_gamma = 0; for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { /*---------------------------------------------------------------* * Find the weighted LPC coefficients for the weighting filter. * *---------------------------------------------------------------*/ Weight_Az(pAp, gamma1[i_gamma], m_ap, Ap1); Weight_Az(pAp, gamma2[i_gamma], m_ap, Ap2); /*---------------------------------------------------------------* * Compute impulse response, h1[], of weighted synthesis filter * *---------------------------------------------------------------*/ for (i = 0; i <=m_ap; i++) ai_zero[i] = Ap1[i]; Syn_filte(m_aq, pAq, ai_zero, h1, L_SUBFR, zero, 0); Syn_filte(m_ap, Ap2, h1, h1, L_SUBFR, zero, 0); /*------------------------------------------------------------------------* * * * Find the target vector for pitch search: * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * * |------| res[n] * * speech[n]---| A(z) |-------- * * |------| | |--------| error[n] |------| * * zero -- (-)--| 1/A(z) |-----------| W(z) |-- target * * exc |--------| |------| * * * * Instead of subtracting the zero-input response of filters from * * the weighted input speech, the above configuration is used to * * compute the target vector. This configuration gives better performance * * with fixed-point implementation. The memory of 1/A(z) is updated by * * filtering (res[n]-exc[n]) through 1/A(z), or simply by subtracting * * the synthesis speech from the input speech: * * error[n] = speech[n] - syn[n]. * * The memory of W(z) is updated by filtering error[n] through W(z), * * or more simply by subtracting the filtered adaptive and fixed * * codebook excitations from the target: * * target[n] - gain_pit*y1[n] - gain_code*y2[n] * * as these signals are already available. * * * *------------------------------------------------------------------------*/ Residue(m_aq, pAq, &speech[i_subfr], &exc[i_subfr], L_SUBFR); /* LPC residual */ for (i=0; i<L_SUBFR; i++) res2[i] = exc[i_subfr+i]; Syn_filte(m_aq, pAq, &exc[i_subfr], error, L_SUBFR, &mem_err[M_BWD-m_aq], 0); Residue(m_ap, Ap1, error, xn, L_SUBFR); Syn_filte(m_ap, Ap2, xn, xn, L_SUBFR, &mem_w0[M_BWD-m_ap], 0); /* target signal xn[]*/ /*----------------------------------------------------------------------* * Closed-loop fractional pitch search * *----------------------------------------------------------------------*/ T0 = Pitch_fr3(&exc[i_subfr], xn, h1, L_SUBFR, T0_min, T0_max, i_subfr, &T0_frac); index = Enc_lag3(T0, T0_frac, &T0_min, &T0_max,PIT_MIN,PIT_MAX, i_subfr); *ana++ = index; if ( (i_subfr == 0) ) { *ana = Parity_Pitch(index); if( rate == G729E) { *ana ^= (shr(index, 1) & 0x0001); } ana++; } /*-----------------------------------------------------------------* * - find unity gain pitch excitation (adaptive codebook entry) * * with fractional interpolation. * * - find filtered pitch exc. y1[]=exc[] convolve with h1[]) * * - compute pitch gain and limit between 0 and 1.2 * * - update target vector for codebook search * * - find LTP residual. * *-----------------------------------------------------------------*/ Pred_lt_3(&exc[i_subfr], T0, T0_frac, L_SUBFR); Convolve(&exc[i_subfr], h1, y1, L_SUBFR); gain_pit = G_pitch(xn, y1, g_coeff, L_SUBFR); /* clip pitch gain if taming is necessary */ taming = test_err(T0, T0_frac); if( taming == 1){ if (sub(gain_pit, GPCLIP) > 0) { gain_pit = GPCLIP; } } /* xn2[i] = xn[i] - y1[i] * gain_pit */ for (i = 0; i < L_SUBFR; i++) { L_temp = L_mult(y1[i], gain_pit); L_temp = L_shl(L_temp, 1); /* gain_pit in Q14 */ xn2[i] = sub(xn[i], extract_h(L_temp)); } /*-----------------------------------------------------* * - Innovative codebook search. * *-----------------------------------------------------*/ switch (rate) { case G729: /* 8 kbit/s */ { /* case 8 kbit/s */ index = ACELP_Codebook(xn2, h1, T0, sharp, i_subfr, code, y2, &i); *ana++ = index; /* Positions index */ *ana++ = i; /* Signs index */ break; } case G729E: /* 11.8 kbit/s */ { /*-----------------------------------------------------------------* * Include fixed-gain pitch contribution into impulse resp. h[] * *-----------------------------------------------------------------*/ pit_sharp = shl(sharp, 1); /* From Q14 to Q15 */ if(T0 < L_SUBFR) { for (i = T0; i < L_SUBFR; i++){ /* h[i] += pitch_sharp*h[i-T0] */ h1[i] = add(h1[i], mult(h1[i-T0], pit_sharp)); } } /* calculate residual after long term prediction */ /* res2[i] -= exc[i+i_subfr] * gain_pit */ for (i = 0; i < L_SUBFR; i++) { L_temp = L_mult(exc[i+i_subfr], gain_pit); L_temp = L_shl(L_temp, 1); /* gain_pit in Q14 */ res2[i] = sub(res2[i], extract_h(L_temp)); } if (lp_mode == 0) ACELP_10i40_35bits(xn2, res2, h1, code, y2, ana); /* Forward */ else ACELP_12i40_44bits(xn2, res2, h1, code, y2, ana); /* Backward */ ana += 5; /*-----------------------------------------------------------------* * Include fixed-gain pitch contribution into code[]. * *-----------------------------------------------------------------*/ if(T0 < L_SUBFR) { for (i = T0; i < L_SUBFR; i++) { /* code[i] += pitch_sharp*code[i-T0] */ code[i] = add(code[i], mult(code[i-T0], pit_sharp)); } } break; } default : { printf("Unrecognized bit rate\n"); exit(-1); } } /* end of switch */ /*-----------------------------------------------------* * - Quantization of gains. * *-----------------------------------------------------*/ g_coeff_cs[0] = g_coeff[0]; /* <y1,y1> */ exp_g_coeff_cs[0] = negate(g_coeff[1]); /* Q-Format:XXX -> JPN */ g_coeff_cs[1] = negate(g_coeff[2]); /* (xn,y1) -> -2<xn,y1> */ exp_g_coeff_cs[1] = negate(add(g_coeff[3], 1)); /* Q-Format:XXX -> JPN */ Corr_xy2( xn, y1, y2, g_coeff_cs, exp_g_coeff_cs ); /* Q0 Q0 Q12 ^Qx ^Q0 */ /* g_coeff_cs[3]:exp_g_coeff_cs[3] = <y2,y2> */ /* g_coeff_cs[4]:exp_g_coeff_cs[4] = -2<xn,y2> */ /* g_coeff_cs[5]:exp_g_coeff_cs[5] = 2<y1,y2> */ index = Qua_gain(code, g_coeff_cs, exp_g_coeff_cs, L_SUBFR, &gain_pit, &gain_code, taming); *ana++ = index; /*------------------------------------------------------------* * - Update pitch sharpening "sharp" with quantized gain_pit * *------------------------------------------------------------*/ for (i= 0; i< 4; i++) pgain_buf[i] = pgain_buf[i+1]; pgain_buf[4] = gain_pit; sharp = gain_pit; if (sub(sharp, SHARPMAX) > 0) sharp = SHARPMAX; else { if (sub(sharp, SHARPMIN) < 0) sharp = SHARPMIN; } /*------------------------------------------------------* * - Find the total excitation * * - find synthesis speech corresponding to exc[] * * - update filters memories for finding the target * * vector in the next subframe * * (update error[-m..-1] and mem_w_err[]) * * update error function for taming process * *------------------------------------------------------*/ for (i = 0; i < L_SUBFR; i++) { /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */ /* exc[i] in Q0 gain_pit in Q14 */ /* code[i] in Q13 gain_cod in Q1 */ L_temp = L_mult(exc[i+i_subfr], gain_pit); L_temp = L_mac(L_temp, code[i], gain_code); L_temp = L_shl(L_temp, 1); exc[i+i_subfr] = round(L_temp); } update_exc_err(gain_pit, T0); Syn_filte(m_aq, pAq, &exc[i_subfr], &synth_ptr[i_subfr], L_SUBFR, &mem_syn[M_BWD-m_aq], 0); for(j=0; j<M_BWD; j++) mem_syn[j] = synth_ptr[i_subfr+L_SUBFR-M_BWD+j]; for (i = L_SUBFR-M_BWD, j = 0; i < L_SUBFR; i++, j++) { mem_err[j] = sub(speech[i_subfr+i], synth_ptr[i_subfr+i]); temp = extract_h(L_shl( L_mult(y1[i], gain_pit), 1) ); k = extract_h(L_shl( L_mult(y2[i], gain_code), 2) ); mem_w0[j] = sub(xn[i], add(temp, k)); } pAp += m_ap+1; pAq += m_aq+1; i_gamma = add(i_gamma,1); } /*--------------------------------------------------* * Update signal for next frame. * * -> shift to the left by L_FRAME: * * speech[], wsp[] and exc[] * *--------------------------------------------------*/ Copy(&old_speech[L_FRAME], &old_speech[0], L_TOTAL-L_FRAME); Copy(&old_wsp[L_FRAME], &old_wsp[0], PIT_MAX); Copy(&old_exc[L_FRAME], &old_exc[0], PIT_MAX+L_INTERPOL); prev_lp_mode = lp_mode; return; }
/*---------------------------------------------------------------------------- ; FUNCTION CODE ----------------------------------------------------------------------------*/ Word16 hp_max( Word32 corr[], /* i : correlation vector. */ Word16 scal_sig[], /* i : scaled signal. */ Word16 L_frame, /* i : length of frame to compute pitch */ Word16 lag_max, /* i : maximum lag */ Word16 lag_min, /* i : minimum lag */ Word16 *cor_hp_max, /* o : max high-pass filtered norm. correlation */ Flag *pOverflow /* i/o : overflow Flag */ ) { Word16 i; Word16 *p, *p1; Word32 max, t0, t1; Word16 max16, t016, cor_max; Word16 shift, shift1, shift2; Word32 L_temp; max = MIN_32; t0 = 0L; for (i = lag_max - 1; i > lag_min; i--) { /* high-pass filtering */ t0 = L_shl(corr[-i], 1, pOverflow); L_temp = L_sub(t0, corr[-i-1], pOverflow); t0 = L_sub(L_temp, corr[-i+1], pOverflow); t0 = L_abs(t0); if (t0 >= max) { max = t0; } } /* compute energy */ p = scal_sig; p1 = &scal_sig[0]; t0 = 0L; for (i = 0; i < L_frame; i++, p++, p1++) { t0 = L_mac(t0, *p, *p1, pOverflow); } p = scal_sig; p1 = &scal_sig[-1]; t1 = 0L; for (i = 0; i < L_frame; i++, p++, p1++) { t1 = L_mac(t1, *p, *p1, pOverflow); } /* high-pass filtering */ L_temp = L_shl(t0, 1, pOverflow); t1 = L_shl(t1, 1, pOverflow); t0 = L_sub(L_temp, t1, pOverflow); t0 = L_abs(t0); /* max/t0 */ /* shift1 = sub(norm_l(max), 1); max16 = extract_h(L_shl(max, shift1)); shift2 = norm_l(t0); t016 = extract_h(L_shl(t0, shift2)); */ t016 = norm_l(max); shift1 = sub(t016, 1, pOverflow); L_temp = L_shl(max, shift1, pOverflow); max16 = (Word16)(L_temp >> 16); shift2 = norm_l(t0); L_temp = L_shl(t0, shift2, pOverflow); t016 = (Word16)(L_temp >> 16); if (t016 != 0) { cor_max = div_s(max16, t016); } else { cor_max = 0; } shift = sub(shift1, shift2, pOverflow); if (shift >= 0) { *cor_hp_max = shr(cor_max, shift, pOverflow); /* Q15 */ } else { *cor_hp_max = shl(cor_max, negate(shift), pOverflow); /* Q15 */ } return 0; }
Word16 Pitch_ol( /* output: open loop pitch lag */ Word16 signal[], /* input : signal used to compute the open loop pitch */ /* signal[-pit_max] to signal[-1] should be known */ Word16 pit_min, /* input : minimum pitch lag */ Word16 pit_max, /* input : maximum pitch lag */ Word16 L_frame /* input : length of frame to compute pitch */ ) { Word16 i, j; Word16 max1, max2, max3; Word16 p_max1, p_max2, p_max3; Word32 t0, L_temp; /* Scaled signal */ Word16 scaled_signal[L_FRAME+PIT_MAX]; Word16 *scal_sig; scal_sig = &scaled_signal[pit_max]; /*--------------------------------------------------------* * Verification for risk of overflow. * *--------------------------------------------------------*/ Overflow = 0; t0 = 0; for(i= -pit_max; i< L_frame; i++) t0 = L_mac(t0, signal[i], signal[i]); /*--------------------------------------------------------* * Scaling of input signal. * * * * if Overflow -> scal_sig[i] = signal[i]>>3 * * else if t0 < 1^20 -> scal_sig[i] = signal[i]<<3 * * else -> scal_sig[i] = signal[i] * *--------------------------------------------------------*/ if(Overflow == 1) { for(i=-pit_max; i<L_frame; i++) scal_sig[i] = shr(signal[i], 3); } else { L_temp = L_sub(t0, (Word32)1048576L); if ( L_temp < (Word32)0 ) /* if (t0 < 2^20) */ { for(i=-pit_max; i<L_frame; i++) scal_sig[i] = shl(signal[i], 3); } else { for(i=-pit_max; i<L_frame; i++) scal_sig[i] = signal[i]; } } /*--------------------------------------------------------------------* * The pitch lag search is divided in three sections. * * Each section cannot have a pitch multiple. * * We find a maximum for each section. * * We compare the maximum of each section by favoring small lag. * * * * First section: lag delay = pit_max downto 4*pit_min * * Second section: lag delay = 4*pit_min-1 downto 2*pit_min * * Third section: lag delay = 2*pit_min-1 downto pit_min * *--------------------------------------------------------------------*/ j = shl(pit_min, 2); p_max1 = Lag_max(scal_sig, L_frame, pit_max, j, &max1); i = sub(j, 1); j = shl(pit_min, 1); p_max2 = Lag_max(scal_sig, L_frame, i, j, &max2); i = sub(j, 1); p_max3 = Lag_max(scal_sig, L_frame, i, pit_min , &max3); /*--------------------------------------------------------------------* * Compare the 3 sections maximum, and favor small lag. * *--------------------------------------------------------------------*/ if( sub(mult(max1, THRESHPIT), max2) < 0) { max1 = max2; p_max1 = p_max2; } if( sub(mult(max1, THRESHPIT), max3) < 0) { p_max1 = p_max3; } return (p_max1); }
/************************************************************************* * * FUNCTION: Dec_gain() * * PURPOSE: Decode the pitch and codebook gains * ************************************************************************/ void Dec_gain( gc_predState *pred_state, /* i/o: MA predictor state */ enum Mode mode, /* i : AMR mode */ Word16 index, /* i : index of quantization. */ Word16 code[], /* i : Innovative vector. */ Word16 evenSubfr, /* i : Flag for even subframes */ Word16 * gain_pit, /* o : Pitch gain. */ Word16 * gain_cod /* o : Code gain. */ ) { const Word16 *p; Word16 frac, gcode0, exp, qua_ener, qua_ener_MR122; Word16 g_code; Word32 L_tmp; /* Read the quantized gains (table depends on mode) */ index = shl (index, 2); test(); test(); test(); if ( sub (mode, MR102) == 0 || sub (mode, MR74) == 0 || sub (mode, MR67) == 0) { p = &table_gain_highrates[index]; move16 (); *gain_pit = *p++; move16 (); g_code = *p++; move16 (); qua_ener_MR122 = *p++; move16 (); qua_ener = *p; move16 (); } else { test(); if (sub (mode, MR475) == 0) { index = add (index, shl(sub(1, evenSubfr), 1)); p = &table_gain_MR475[index]; move16 (); *gain_pit = *p++; move16 (); g_code = *p++; move16 (); /*---------------------------------------------------------* * calculate predictor update values (not stored in 4.75 * * quantizer table to save space): * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * * qua_ener = log2(g) * * qua_ener_MR122 = 20*log10(g) * *---------------------------------------------------------*/ /* Log2(x Q12) = log2(x) + 12 */ Log2 (L_deposit_l (g_code), &exp, &frac); exp = sub(exp, 12); qua_ener_MR122 = add (shr_r (frac, 5), shl (exp, 10)); /* 24660 Q12 ~= 6.0206 = 20*log10(2) */ L_tmp = Mpy_32_16(exp, frac, 24660); qua_ener = round (L_shl (L_tmp, 13)); /* Q12 * Q0 = Q13 -> Q10 */ } else { p = &table_gain_lowrates[index]; move16 (); *gain_pit = *p++; move16 (); g_code = *p++; move16 (); qua_ener_MR122 = *p++; move16 (); qua_ener = *p; move16 (); } } /*-------------------------------------------------------------------* * predict codebook gain * * ~~~~~~~~~~~~~~~~~~~~~ * * gc0 = Pow2(int(d)+frac(d)) * * = 2^exp + 2^frac * * * * gcode0 (Q14) = 2^14*2^frac = gc0 * 2^(14-exp) * *-------------------------------------------------------------------*/ gc_pred(pred_state, mode, code, &exp, &frac, NULL, NULL); gcode0 = extract_l(Pow2(14, frac)); /*------------------------------------------------------------------* * read quantized gains, update table of past quantized energies * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * st->past_qua_en(Q10) = 20 * Log10(g_fac) / constant * * = Log2(g_fac) * * = qua_ener * * constant = 20*Log10(2) * *------------------------------------------------------------------*/ L_tmp = L_mult(g_code, gcode0); L_tmp = L_shr(L_tmp, sub(10, exp)); *gain_cod = extract_h(L_tmp); /* update table of past quantized energies */ gc_pred_update(pred_state, qua_ener_MR122, qua_ener); return; }
void musdetect( Word16 rate, Word16 r_h, Word16 r_l, Word16 exp_R0, Word16 *rc, Word16 *lags, Word16 *pgains, Word16 stat_flg, Word16 frm_count, Word16 prev_vad, Word16 *Vad, Word16 LLenergy) { Word16 i,j; static Word16 count_music=0; static Word16 Mcount_music=0; static Word16 count_consc=0; Word16 std; static Word16 MeanPgain =8192; Word16 PFLAG1, PFLAG2, PFLAG; static Word16 count_pflag=0; static Word16 Mcount_pflag=0; static Word16 count_consc_pflag=0; static Word16 count_consc_rflag=0; static Word16 mrc[10]={0,0, 0, 0, 0, 0, 0, 0,0,0}; static Word16 MeanSE =0; Word16 pderr, Lenergy , SD; Word16 Thres, Coeff, C_Coeff; Word32 acc0; Word16 exp, frac, lagsum; pderr =32767; for (i=0; i<4; i++){ j = mult(rc[i], rc[i]); j = sub(32767, j); pderr = mult(pderr, j); } /* compute the frame energy */ acc0 = Mpy_32_16(r_h, r_l, pderr); Log2(acc0, &exp, &frac); acc0 = Mpy_32_16(exp, frac, 9864); i = sub(exp_R0, 1); i = sub(i, 1); acc0 = L_mac(acc0, 9864, i); acc0 = L_shl(acc0, 11); Lenergy = extract_h(acc0); Lenergy = sub(Lenergy, 4875); acc0 = 0L; for (i=0; i<10; i++){ j = sub(mrc[i], rc[i]); acc0 = L_mac(acc0, j, j); } SD = extract_h(acc0); if( *Vad == NOISE ){ for (i=0; i<10; i++){ acc0 = L_mult(29491, mrc[i]); acc0 = L_mac(acc0, 3277, rc[i]); mrc[i] = extract_h(acc0); } acc0 = L_mult(29491, MeanSE); acc0 = L_mac(acc0, 3277, Lenergy); MeanSE = extract_h(acc0); } /* determine the PFLAG */ acc0 = 0L; lagsum = 0; for (i=0; i<5; i++){ acc0 = L_mac(acc0, pgains[i], 6554); /* 1/5 in Q15 */ lagsum = add(lagsum, lags[i]); } acc0 = L_mult(extract_h(acc0), 6554); acc0 = L_mac(acc0, MeanPgain, 26214); MeanPgain = extract_h(acc0); /* compute the mean pitch gain */ acc0 = 0L; for (i=0; i<5; i++){ /* NOTE: the variance of the lag is scaled up by 25 */ j = shl(lags[i], 2); j = add(j,lags[i]); /* j equals 5*lags[i] */ j = sub(j, lagsum); acc0 = L_mac(acc0, j, j); } acc0 = L_shl(acc0, 22); /* NOTE: the final variance of the lag is scaled up by 25x128 */ std = extract_h(acc0); if ( rate == G729D) Thres = 11960; /* 0.73 in Q14 */ else Thres = 10322; /* 0.63 in Q14 */ if ( sub(MeanPgain,Thres) > 0) PFLAG2 =1; else PFLAG2 =0; /* 21632 = 1.3*1.3*25*4*128*/ if ( (sub(std, 21632) < 0) && (sub(MeanPgain, 7373) > 0)) PFLAG1 =1; else PFLAG1 =0; PFLAG= (Word16)( ((Word16)prev_vad & (Word16)(PFLAG1 | PFLAG2))| (Word16)(PFLAG2)); if( (sub(rc[1], 14746) <= 0) && (rc[1] >= 0) && (sub(MeanPgain,8192) < 0)) count_consc_rflag = add(count_consc_rflag,1); else count_consc_rflag =0; if( (stat_flg == 1) && (*Vad == VOICE)) count_music = add(count_music,256); /* Q8 */ if( (frm_count & 0x003f) == 0){ if( frm_count == 64) Mcount_music = count_music; else{ acc0 = L_mult(29491, Mcount_music); acc0 = L_mac(acc0, 3277, count_music); Mcount_music = extract_h(acc0); } } if( count_music == 0) count_consc = add(count_consc,1); else count_consc = 0; if( ((sub(count_consc, 500)>0) || (sub(count_consc_rflag , 150)>0))) Mcount_music = 0; if( (frm_count & 0x003f) == 0){ count_music = 0; } if( PFLAG== 1 ) count_pflag = add(count_pflag,256); /* Q8 */ if( (frm_count & 0x003f) == 0){ if( frm_count == 64) Mcount_pflag = count_pflag; else{ if( sub(count_pflag , 6400)> 0){ Coeff = 32113; C_Coeff = 655; } else if( sub(count_pflag , 5120)> 0){ Coeff = 31130; C_Coeff = 1638; } else { Coeff = 29491; C_Coeff = 3277; } acc0 = L_mult(Coeff, Mcount_pflag); acc0 = L_mac(acc0, C_Coeff, count_pflag); Mcount_pflag = extract_h(acc0); } } if( count_pflag == 0) count_consc_pflag = add(count_consc_pflag,1); else count_consc_pflag = 0; if( ((sub(count_consc_pflag, 100)>0) || (sub(count_consc_rflag , 150)>0))) Mcount_pflag = 0; if( (frm_count & 0x003f) == 0) count_pflag = 0; if (rate == G729E){ if( (sub(SD,4915) > 0) && (sub(Lenergy ,MeanSE)> 819) && (sub(LLenergy,10240) >0 ) ) *Vad =VOICE; else if( ((sub(SD,12452) > 0) || (sub(Lenergy ,MeanSE)> 819)) && (sub(LLenergy,10240) >0 ) ) *Vad =VOICE; else if( ( (sub(Mcount_pflag ,2560) >0) || (sub(Mcount_music ,280)>0) || (sub(frm_count,64) < 0)) && (sub(LLenergy,1433) >0)) *Vad =VOICE; } return; }
int SetAlphaIntoColor(int col, int alpha) { int newCol = and(col,16777215) + shl(alpha,24); return newCol; }
void GSDrawScanlineCodeGenerator::Init(int params) { const int _top = params + 4; const int _v = params + 8; // int skip = left & 3; mov(ebx, edx); and(edx, 3); // left -= skip; sub(ebx, edx); // int steps = right - left - 4; sub(ecx, ebx); sub(ecx, 4); // GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))]; shl(edx, 4); movdqa(xmm7, xmmword[edx + (size_t)&m_test[0]]); mov(eax, ecx); sar(eax, 31); and(eax, ecx); shl(eax, 4); por(xmm7, xmmword[eax + (size_t)&m_test[7]]); // GSVector2i* fza_base = &m_env.fzbr[top]; mov(esi, dword[esp + _top]); lea(esi, ptr[esi * 8]); add(esi, dword[&m_env.fzbr]); // GSVector2i* fza_offset = &m_env.fzbc[left >> 2]; lea(edi, ptr[ebx * 2]); add(edi, dword[&m_env.fzbc]); if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_env.d[skip] shl(edx, 4); lea(edx, ptr[edx + (size_t)m_env.d]); // ebx = &v mov(ebx, dword[esp + _v]); } if(!m_sel.sprite) { if(m_sel.fwrite && m_sel.fge || m_sel.zb) { movaps(xmm0, xmmword[ebx + 16]); // v.p if(m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_env.d[skip].f); cvttps2dq(xmm1, xmm0); pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); paddw(xmm1, xmmword[edx + 16 * 6]); movdqa(xmmword[&m_env.temp.f], xmm1); } if(m_sel.zb) { // z = vp.zzzz() + m_env.d[skip].z; shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); addps(xmm0, xmmword[edx]); movaps(xmmword[&m_env.temp.z], xmm0); } } } else { if(m_sel.ztest) { movdqa(xmm0, xmmword[&m_env.p.z]); } } if(m_sel.fb) { if(m_sel.edge || m_sel.tfx != TFX_NONE) { movaps(xmm4, xmmword[ebx + 32]); // v.t } if(m_sel.edge) { pshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); pshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3)); psrlw(xmm3, 9); movdqa(xmmword[&m_env.temp.cov], xmm3); } if(m_sel.tfx != TFX_NONE) { if(m_sel.fst) { // GSVector4i vti(vt); cvttps2dq(xmm4, xmm4); // si = vti.xxxx() + m_env.d[skip].si; // ti = vti.yyyy(); if(!sprite) ti += m_env.d[skip].ti; pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddd(xmm2, xmmword[edx + 16 * 7]); if(!m_sel.sprite) { paddd(xmm3, xmmword[edx + 16 * 8]); } else { if(m_sel.ltf) { movdqa(xmm4, xmm3); pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); psrlw(xmm4, 1); movdqa(xmmword[&m_env.temp.vf], xmm4); } } movdqa(xmmword[&m_env.temp.s], xmm2); movdqa(xmmword[&m_env.temp.t], xmm3); } else { // s = vt.xxxx() + m_env.d[skip].s; // t = vt.yyyy() + m_env.d[skip].t; // q = vt.zzzz() + m_env.d[skip].q; movaps(xmm2, xmm4); movaps(xmm3, xmm4); shufps(xmm2, xmm2, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1)); shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); addps(xmm2, xmmword[edx + 16 * 1]); addps(xmm3, xmmword[edx + 16 * 2]); addps(xmm4, xmmword[edx + 16 * 3]); movaps(xmmword[&m_env.temp.s], xmm2); movaps(xmmword[&m_env.temp.t], xmm3); movaps(xmmword[&m_env.temp.q], xmm4); rcpps(xmm4, xmm4); mulps(xmm2, xmm4); mulps(xmm3, xmm4); } } if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { if(m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); cvttps2dq(xmm6, xmmword[ebx]); // v.c // vc = vc.upl16(vc.zwxy()); pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 0, 3, 2)); punpcklwd(xmm6, xmm5); // rb = vc.xxxx().add16(m_env.d[skip].rb); // ga = vc.zzzz().add16(m_env.d[skip].ga); pshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); paddw(xmm5, xmmword[edx + 16 * 4]); paddw(xmm6, xmmword[edx + 16 * 5]); movdqa(xmmword[&m_env.temp.rb], xmm5); movdqa(xmmword[&m_env.temp.ga], xmm6); } else { if(m_sel.tfx == TFX_NONE) { movdqa(xmm5, xmmword[&m_env.c.rb]); movdqa(xmm6, xmmword[&m_env.c.ga]); } } } } }
void GSDrawScanlineCodeGenerator::Step() { // steps -= 4; sub(ecx, 4); // fza_offset++; add(edi, 8); if(!m_sel.sprite) { // z += m_env.d4.z; if(m_sel.zb) { movaps(xmm0, xmmword[&m_env.temp.z]); addps(xmm0, xmmword[&m_env.d4.z]); movaps(xmmword[&m_env.temp.z], xmm0); } // f = f.add16(m_env.d4.f); if(m_sel.fwrite && m_sel.fge) { movdqa(xmm1, xmmword[&m_env.temp.f]); paddw(xmm1, xmmword[&m_env.d4.f]); movdqa(xmmword[&m_env.temp.f], xmm1); } } else { if(m_sel.ztest) { movdqa(xmm0, xmmword[&m_env.p.z]); } } if(m_sel.fb) { if(m_sel.tfx != TFX_NONE) { if(m_sel.fst) { // GSVector4i st = m_env.d4.st; // si += st.xxxx(); // if(!sprite) ti += st.yyyy(); movdqa(xmm4, xmmword[&m_env.d4.st]); pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); paddd(xmm2, xmmword[&m_env.temp.s]); movdqa(xmmword[&m_env.temp.s], xmm2); if(!m_sel.sprite) { pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddd(xmm3, xmmword[&m_env.temp.t]); movdqa(xmmword[&m_env.temp.t], xmm3); } else { movdqa(xmm3, xmmword[&m_env.temp.t]); } } else { // GSVector4 stq = m_env.d4.stq; // s += stq.xxxx(); // t += stq.yyyy(); // q += stq.zzzz(); movaps(xmm2, xmmword[&m_env.d4.stq]); movaps(xmm3, xmm2); movaps(xmm4, xmm2); shufps(xmm2, xmm2, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1)); shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); addps(xmm2, xmmword[&m_env.temp.s]); addps(xmm3, xmmword[&m_env.temp.t]); addps(xmm4, xmmword[&m_env.temp.q]); movaps(xmmword[&m_env.temp.s], xmm2); movaps(xmmword[&m_env.temp.t], xmm3); movaps(xmmword[&m_env.temp.q], xmm4); rcpps(xmm4, xmm4); mulps(xmm2, xmm4); mulps(xmm3, xmm4); } } if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { if(m_sel.iip) { // GSVector4i c = m_env.d4.c; // rb = rb.add16(c.xxxx()); // ga = ga.add16(c.yyyy()); movdqa(xmm7, xmmword[&m_env.d4.c]); pshufd(xmm5, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm6, xmm7, _MM_SHUFFLE(1, 1, 1, 1)); paddw(xmm5, xmmword[&m_env.temp.rb]); paddw(xmm6, xmmword[&m_env.temp.ga]); movdqa(xmmword[&m_env.temp.rb], xmm5); movdqa(xmmword[&m_env.temp.ga], xmm6); } else { if(m_sel.tfx == TFX_NONE) { movdqa(xmm5, xmmword[&m_env.c.rb]); movdqa(xmm6, xmmword[&m_env.c.ga]); } } } } // test = m_test[7 + (steps & (steps >> 31))]; mov(edx, ecx); sar(edx, 31); and(edx, ecx); shl(edx, 4); movdqa(xmm7, xmmword[edx + (size_t)&m_test[7]]); }
/* Standard Long-Term Postfilter */ void postfilter( Word16 *s, /* input : quantized speech signal */ Word16 pp, /* input : pitch period */ Word16 *ma_a, Word16 *b_prv, Word16 *pp_prv, Word16 *e) /* output: enhanced speech signal */ { int n; Word16 len, t0, t1, t2, t3, shift, aa, R0norm, R0_exp; Word32 a0, a1, R0, R1, R01, R01max, Rx; Word16 *fp1; Word16 ppt, pptmin, pptmax, ppnew; Word16 bb[2]; Word16 R1max_exp, R1max, R01Sqmax_exp, R01Sqmax, R01Sq_exp, R01Sq, R1_exp, R1n; Word16 gainn, Rx_exp; Word16 buf[MAXPP+FRSZ]; Word16 *ps, ww1, ww2; Word32 step, delta; Word16 bi0, bi1c, bi1p; ps = s+XQOFF; /********************************************************************/ /* pitch search around decoded pitch */ /********************************************************************/ pptmin = sub(pp, DPPQNS); pptmax = add(pp, DPPQNS); if (pptmin<MINPP) { pptmin = MINPP; pptmax = add(pptmin, 2*DPPQNS); } else if (pptmax>MAXPP) { pptmax = MAXPP; pptmin = sub(pptmax, 2*DPPQNS); } fp1 = &s[XQOFF-pptmax]; len = add(FRSZ, pptmax); a0 = 0; for (n=0;n<len;n++) { t1 = shr(*fp1++, 3); a0 = L_mac0(a0,t1,t1); } shift = norm_l(a0); if (a0==0) shift=31; shift = sub(6, shift); if (shift > 0) { ps = buf+pptmax; fp1 = &s[XQOFF-pptmax]; shift = shr(add(shift, 1), 1); for (n=0;n<len;n++) { buf[n] = shr(fp1[n], shift); } } else shift=0; R0 = 0; R1 = 0; R01 = 0; for(n=0; n<FRSZ; n++) { R0 = L_mac0(R0, ps[n], ps[n]); R1 = L_mac0(R1, ps[n-pptmin], ps[n-pptmin]); R01 = L_mac0(R01,ps[n], ps[n-pptmin]); } R0_exp = norm_l(R0); R0norm = extract_h(L_shl(R0, R0_exp)); R0_exp = R0_exp-16; ppnew = pptmin; R1max_exp = norm_l(R1); R1max = extract_h(L_shl(R1, R1max_exp)); R01Sqmax_exp = norm_l(R01); t1 = extract_h(L_shl(R01, R01Sqmax_exp)); R01Sqmax_exp = shl(R01Sqmax_exp, 1); R01Sqmax = extract_h(L_mult(t1, t1)); R01max = R01; for(ppt=pptmin+1; ppt<=pptmax; ppt++) { R1 = L_msu0(R1,ps[FRSZ-ppt], ps[FRSZ-ppt]); R1 = L_mac0(R1,ps[-ppt], ps[-ppt]); R01= 0; for(n=0; n<FRSZ; n++) { R01 = L_mac0(R01, ps[n], ps[n-ppt]); } R01Sq_exp = norm_l(R01); t1 = extract_h(L_shl(R01, R01Sq_exp)); R01Sq_exp = shl(R01Sq_exp, 1); R01Sq = extract_h(L_mult(t1, t1)); R1_exp = norm_l(R1); R1n = extract_h(L_shl(R1, R1_exp)); a0 = L_mult(R01Sq, R1max); a1 = L_mult(R01Sqmax, R1n); t1 = add(R01Sq_exp, R1max_exp); t2 = add(R01Sqmax_exp, R1_exp); t2 = sub(t1, t2); if (t2>=0) a0 = L_shr(a0, t2); if (t2<0) a1 = L_shl(a1, t2); if (L_sub(a0, a1)>0) { R01Sqmax = R01Sq; R01Sqmax_exp = R01Sq_exp; R1max = R1n; R1max_exp = R1_exp; ppnew = ppt; R01max = R01; } } /******************************************************************/ /* calculate all-zero pitch postfilter */ /******************************************************************/ if (R1max==0 || R0==0 || R01max <= 0) { aa = 0; } else { a0 = R1max_exp-16; t1 = mult(R1max, R0norm); a0 = a0+R0_exp-15; sqrt_i(t1, (Word16)a0, &t1, &t2); t0 = norm_l(R01max); t3 = extract_h(L_shl(R01max, t0)); t0 = t0-16; aa = mult(t3, t1); t0 = t0+t2-15; t0 = t0-15; if (t0<0) aa = shl(aa, sub(0,t0)); else aa = shr(aa, t0); } a0 = L_mult(8192, aa); a0 = L_mac(a0, 24576, *ma_a); *ma_a = intround(a0); if((*ma_a < ATHLD1) && (aa < (ATHLD2))) aa = 0; bb[1] = mult(ScLTPF, aa); /******************************************************************/ /* calculate normalization energies */ /******************************************************************/ Rx = 0; R0 = 0; for(n=0; n<FRSZ; n++) { a0 = L_shl(s[XQOFF+n], 15); a0 = L_add(a0, L_mult0(bb[1], s[XQOFF+n-ppnew])); e[n] = intround(a0); t1 = shr(e[n], shift); t2 = shr(s[XQOFF+n], shift); Rx = L_mac0(Rx, t1, t1); R0 = L_mac0(R0, t2, t2); } R0 = L_shr(R0, 2); if(R0 == 0 || Rx == 0) gainn = 32767; else { Rx_exp = norm_l(Rx); t1 = extract_h(L_shl(Rx, Rx_exp)); t2 = extract_h(L_shl(R0, Rx_exp)); if (t2>= t1) gainn = 32767; else { t1 = div_s(t2, t1); gainn = sqrts(t1); } } /******************************************************************/ /* interpolate from the previous postfilter to the current */ /******************************************************************/ bb[0] = gainn; bb[1] = mult(gainn, bb[1]); step = (Word32)((1.0/(NINT+1))*(2147483648.0)); delta = 0; for(n=0; n<NINT; n++) { delta = L_add(delta, step); ww1 = intround(delta); ww2 = add(sub(32767, ww1), 1); /* interpolate between two filters */ bi0 = intround(L_mac(L_mult(ww1, bb[0]), ww2, b_prv[0])); bi1c= mult(ww1, bb[1]); bi1p= mult(ww2, b_prv[1]); e[n] = intround(L_mac(L_mac(L_mult(bi1c, s[XQOFF+n-ppnew]), bi1p, s[XQOFF+n-(*pp_prv)]), bi0, s[XQOFF+n])); } for(n=NINT; n<FRSZ; n++) { e[n] = intround(L_shl(L_mult(gainn, e[n]),1)); } /******************************************************************/ /* save state memory */ /******************************************************************/ *pp_prv = ppnew; b_prv[0] = bb[0]; b_prv[1] = bb[1]; return; }
void GSDrawScanlineCodeGenerator::WriteFrame(int params) { const int _top = params + 4; if(!m_sel.fwrite) { return; } if(m_sel.colclamp == 0) { // c[0] &= 0x000000ff; // c[1] &= 0x000000ff; pcmpeqd(xmm7, xmm7); psrlw(xmm7, 8); pand(xmm5, xmm7); pand(xmm6, xmm7); } if(m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, dword[esp + _top]); and(eax, 3); shl(eax, 5); paddw(xmm5, xmmword[eax + (size_t)&m_env.dimx[0]]); paddw(xmm6, xmmword[eax + (size_t)&m_env.dimx[1]]); } // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); movdqa(xmm7, xmm5); punpcklwd(xmm5, xmm6); punpckhwd(xmm7, xmm6); packuswb(xmm5, xmm7); if(m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; pcmpeqd(xmm7, xmm7); pslld(xmm7, 31); por(xmm5, xmm7); } if(m_sel.fpsm == 2) { // GSVector4i rb = fs & 0x00f800f8; // GSVector4i ga = fs & 0x8000f800; mov(eax, 0x00f800f8); movd(xmm6, eax); pshufd(xmm6, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); mov(eax, 0x8000f800); movd(xmm7, eax); pshufd(xmm7, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); movdqa(xmm4, xmm5); pand(xmm4, xmm6); pand(xmm5, xmm7); // fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); movdqa(xmm6, xmm4); movdqa(xmm7, xmm5); psrld(xmm4, 3); psrld(xmm6, 9); psrld(xmm5, 6); psrld(xmm7, 16); por(xmm5, xmm4); por(xmm7, xmm6); por(xmm5, xmm7); } if(m_sel.rfb) { // fs = fs.blend(fd, fm); blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm } bool fast = m_sel.rfb && m_sel.fpsm < 2; WritePixel(xmm5, xmm0, ebx, dl, fast, m_sel.fpsm); }
void perc_var ( Word16 *gamma1, /* Bandwidth expansion parameter */ Word16 *gamma2, /* Bandwidth expansion parameter */ Word16 *LsfInt, /* Interpolated LSP vector : 1st subframe */ Word16 *LsfNew, /* New LSP vector : 2nd subframe */ Word16 *r_c /* Reflection coefficients */ ) { Word32 L_temp; Word16 cur_rc; /* Q11 */ Word16 Lar[4]; /* Q11 */ Word16 *LarNew; /* Q11 */ Word16 *Lsf; /* Q15 */ Word16 CritLar0, CritLar1; /* Q11 */ Word16 temp; Word16 d_min; /* Q10 */ Word16 i, k; for (k=0; k<M; k++) { LsfInt[k] = shl(LsfInt[k], 1); LsfNew[k] = shl(LsfNew[k], 1); } LarNew = &Lar[2]; /* ---------------------------------------- */ /* Reflection coefficients ---> Lar */ /* Lar(i) = log10( (1+rc) / (1-rc) ) */ /* Approximated by */ /* x <= SEG1 y = x */ /* SEG1 < x <= SEG2 y = A1 x - B1_L */ /* SEG2 < x <= SEG3 y = A2 x - B2_L */ /* x > SEG3 y = A3 x - B3_L */ /* ---------------------------------------- */ for (i=0; i<2; i++) { cur_rc = abs_s(r_c[i]); cur_rc = shr(cur_rc, 4); if (sub(cur_rc ,SEG1)<= 0) { LarNew[i] = cur_rc; } else { if (sub(cur_rc,SEG2)<= 0) { cur_rc = shr(cur_rc, 1); L_temp = L_mult(cur_rc, A1); L_temp = L_sub(L_temp, L_B1); L_temp = L_shr(L_temp, 11); LarNew[i] = extract_l(L_temp); } else { if (sub(cur_rc ,SEG3)<= 0) { cur_rc = shr(cur_rc, 1); L_temp = L_mult(cur_rc, A2); L_temp = L_sub(L_temp, L_B2); L_temp = L_shr(L_temp, 11); LarNew[i] = extract_l(L_temp); } else { cur_rc = shr(cur_rc, 1); L_temp = L_mult(cur_rc, A3); L_temp = L_sub(L_temp, L_B3); L_temp = L_shr(L_temp, 11); LarNew[i] = extract_l(L_temp); } } } if (r_c[i] < 0) { LarNew[i] = sub(0, LarNew[i]); } } /* Interpolation of Lar for the 1st subframe */ temp = add(LarNew[0], LarOld[0]); Lar[0] = shr(temp, 1); LarOld[0] = LarNew[0]; temp = add(LarNew[1], LarOld[1]); Lar[1] = shr(temp, 1); LarOld[1] = LarNew[1]; for (k=0; k<2; k++) { /* LOOP : gamma2 for 1st to 2nd subframes */ /* ---------------------------------------------------------- */ /* First criterion based on the first two Lars */ /* smooth == 1 ==> gamma2 can vary from 0.4 to 0.7 */ /* smooth == 0 ==> gamma2 is set to 0.6 */ /* */ /* Double threshold + hysteresis : */ /* if smooth = 1 */ /* if (CritLar0 < THRESH_L1) and (CritLar1 > THRESH_H1) */ /* smooth = 0 */ /* if smooth = 0 */ /* if (CritLar0 > THRESH_L2) or (CritLar1 < THRESH_H2) */ /* smooth = 1 */ /* ---------------------------------------------------------- */ CritLar0 = Lar[2*k]; CritLar1 = Lar[2*k+1]; if (smooth != 0) { if ((sub(CritLar0,THRESH_L1)<0)&&( sub(CritLar1,THRESH_H1)>0)) { smooth = 0; } } else { if ( (sub(CritLar0 ,THRESH_L2)>0) || (sub(CritLar1,THRESH_H2) <0) ) { smooth = 1; } } if (smooth == 0) { /* ------------------------------------------------------ */ /* Second criterion based on the minimum distance between */ /* two successives LSPs */ /* */ /* gamma2[k] = -6.0 * pi * d_min + 1.0 */ /* */ /* with Lsfs normalized range 0.0 <= val <= 1.0 */ /* ------------------------------------------------------ */ gamma1[k] = GAMMA1_0; if (k == 0) { Lsf = LsfInt; } else { Lsf = LsfNew; } d_min = sub(Lsf[1], Lsf[0]); for (i=1; i<M-1; i++) { temp = sub(Lsf[i+1],Lsf[i]); if (sub(temp,d_min)<0) { d_min = temp; } } temp = mult(ALPHA, d_min); temp = sub(BETA, temp); temp = shl(temp, 5); gamma2[k] = temp; if (sub(gamma2[k] , GAMMA2_0_H)>0) { gamma2[k] = GAMMA2_0_H; } if (sub(gamma2[k] ,GAMMA2_0_L)<0) { gamma2[k] = GAMMA2_0_L; } } else { gamma1[k] = GAMMA1_1; gamma2[k] = GAMMA2_1; } } return; }
Word16 vad2 (Word16 * farray_ptr, vadState2 * st) { /* * The channel table is defined below. In this table, the * lower and higher frequency coefficients for each of the 16 * channels are specified. The table excludes the coefficients * with numbers 0 (DC), 1, and 64 (Foldover frequency). */ const static Word16 ch_tbl[NUM_CHAN][2] = { {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}, {12, 13}, {14, 16}, {17, 19}, {20, 22}, {23, 26}, {27, 30}, {31, 35}, {36, 41}, {42, 48}, {49, 55}, {56, 63} }; /* channel energy scaling table - allows efficient division by number * of DFT bins in the channel: 1/2, 1/3, 1/4, etc. */ const static Word16 ch_tbl_sh[NUM_CHAN] = { 16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923, 10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096 }; /* * The voice metric table is defined below. It is a non- * linear table with a deadband near zero. It maps the SNR * index (quantized SNR value) to a number that is a measure * of voice quality. */ const static Word16 vm_tbl[90] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24, 24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50 }; /* hangover as a function of peak SNR (3 dB steps) */ const static Word16 hangover_table[20] = { 30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8 }; /* burst sensitivity as a function of peak SNR (3 dB steps) */ const static Word16 burstcount_table[20] = { 8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; /* voice metric sensitivity as a function of peak SNR (3 dB steps) */ const static Word16 vm_threshold_table[20] = { 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432 }; /* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */ const static Word16 noise_floor_chan[2] = {NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1}; const static Word16 min_chan_enrg[2] = {MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1}; const static Word16 ine_noise[2] = {INE_NOISE_0, INE_NOISE_1}; const static Word16 fbits[2] = {FRACTIONAL_BITS_0, FRACTIONAL_BITS_1}; const static Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R}; /* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */ const static Word16 enrg_norm_shift[2] = {(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)}; /* Automatic variables */ Word32 Lenrg; /* scaled as 30,1 */ Word32 Ltne; /* scaled as 22,9 */ Word32 Ltce; /* scaled as 22,9 or 27,4 */ Word16 tne_db; /* scaled as 7,8 */ Word16 tce_db; /* scaled as 7,8 */ Word16 input_buffer[FRM_LEN]; /* used for block normalising input data */ Word16 data_buffer[FFT_LEN]; /* used for in-place FFT */ Word16 ch_snr[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_snrq; /* scaled as 15,0 (in 0.375 dB steps) */ Word16 vm_sum; /* scaled as 15,0 */ Word16 ch_enrg_dev; /* scaled as 7,8 */ Word32 Lpeak; /* maximum channel energy */ Word16 p2a_flag; /* flag to indicate spectral peak-to-average ratio > 10 dB */ Word16 ch_enrg_db[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_noise_db; /* scaled as 7,8 */ Word16 alpha; /* scaled as 0,15 */ Word16 one_m_alpha; /* scaled as 0,15 */ Word16 update_flag; /* set to indicate a background noise estimate update */ Word16 i, j, j1, j2; /* Scratch variables */ Word16 hi1, lo1; Word32 Ltmp, Ltmp1, Ltmp2; Word16 tmp; Word16 normb_shift; /* block norm shift count */ Word16 ivad; /* intermediate VAD decision (return value) */ Word16 tsnrq; /* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */ Word16 xt; /* instantaneous frame SNR in dB, scaled as 7,8 */ Word16 state_change; /* Increment frame counter */ st->Lframe_cnt = L_add(st->Lframe_cnt, 1); /* Block normalize the input */ normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM); /* Pre-emphasize the input data and store in the data buffer with the appropriate offset */ for (i = 0; i < DELAY; i++) { data_buffer[i] = 0; move16(); } st->pre_emp_mem = shr_r(st->pre_emp_mem, sub(st->last_normb_shift, normb_shift)); st->last_normb_shift = normb_shift; move16(); data_buffer[DELAY] = add(input_buffer[0], mult(PRE_EMP_FAC, st->pre_emp_mem)); move16(); for (i = DELAY + 1, j = 1; i < DELAY + FRM_LEN; i++, j++) { data_buffer[i] = add(input_buffer[j], mult(PRE_EMP_FAC, input_buffer[j-1])); move16(); } st->pre_emp_mem = input_buffer[FRM_LEN-1]; move16(); for (i = DELAY + FRM_LEN; i < FFT_LEN; i++) { data_buffer[i] = 0; move16(); } /* Perform FFT on the data buffer */ r_fft(data_buffer); /* Use normb_shift factor to determine the scaling of the energy estimates */ state_change = 0; move16(); test(); if (st->shift_state == 0) { test(); if (sub(normb_shift, -FFT_HEADROOM+2) <= 0) { state_change = 1; move16(); st->shift_state = 1; move16(); } } else { test(); if (sub(normb_shift, -FFT_HEADROOM+5) >= 0) { state_change = 1; move16(); st->shift_state = 0; move16(); } } /* Scale channel energy estimate */ test(); if (state_change) { for (i = LO_CHAN; i <= HI_CHAN; i++) { st->Lch_enrg[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[st->shift_state]); move32(); } } /* Estimate the energy in each channel */ test(); if (L_sub(st->Lframe_cnt, 1) == 0) { alpha = 32767; move16(); one_m_alpha = 0; move16(); } else { alpha = CEE_SM_FAC; move16(); one_m_alpha = ONE_MINUS_CEE_SM_FAC; move16(); } for (i = LO_CHAN; i <= HI_CHAN; i++) { Lenrg = 0; move16(); j1 = ch_tbl[i][0]; move16(); j2 = ch_tbl[i][1]; move16(); for (j = j1; j <= j2; j++) { Lenrg = L_mac(Lenrg, data_buffer[2 * j], data_buffer[2 * j]); Lenrg = L_mac(Lenrg, data_buffer[2 * j + 1], data_buffer[2 * j + 1]); } /* Denorm energy & scale 30,1 according to the state */ Lenrg = L_shr_r(Lenrg, sub(shl(normb_shift, 1), enrg_norm_shift[st->shift_state])); /* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */ tmp = mult(alpha, ch_tbl_sh[i]); L_Extract (Lenrg, &hi1, &lo1); Ltmp = Mpy_32_16(hi1, lo1, tmp); L_Extract (st->Lch_enrg[i], &hi1, &lo1); st->Lch_enrg[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, one_m_alpha)); move32(); test(); if (L_sub(st->Lch_enrg[i], min_chan_enrg[st->shift_state]) < 0) { st->Lch_enrg[i] = min_chan_enrg[st->shift_state]; move32(); } } /* Compute the total channel energy estimate (Ltce) */ Ltce = 0; move16(); for (i = LO_CHAN; i <= HI_CHAN; i++) { Ltce = L_add(Ltce, st->Lch_enrg[i]); } /* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */ Lpeak = 0; move32(); for (i = LO_CHAN+2; i <= HI_CHAN; i++) /* Sine waves not valid for low frequencies */ { test(); if (L_sub(st->Lch_enrg [i], Lpeak) > 0) { Lpeak = st->Lch_enrg [i]; move32(); } } /* Set p2a_flag if peak (dB) > average channel energy (dB) + 10 dB */ /* Lpeak > Ltce/num_channels * 10^(10/10) */ /* Lpeak > (10/16)*Ltce */ L_Extract (Ltce, &hi1, &lo1); Ltmp = Mpy_32_16(hi1, lo1, 20480); test(); if (L_sub(Lpeak, Ltmp) > 0) { p2a_flag = TRUE; move16(); } else { p2a_flag = FALSE; move16(); } /* Initialize channel noise estimate to either the channel energy or fixed level */ /* Scale the energy appropriately to yield state 0 (22,9) scaling for noise */ test(); if (L_sub(st->Lframe_cnt, 4) <= 0) { test(); if (p2a_flag == TRUE) { for (i = LO_CHAN; i <= HI_CHAN; i++) { st->Lch_noise[i] = INE_NOISE_0; move32(); } } else { for (i = LO_CHAN; i <= HI_CHAN; i++) { test(); if (L_sub(st->Lch_enrg[i], ine_noise[st->shift_state]) < 0) { st->Lch_noise[i] = INE_NOISE_0; move32(); } else { test(); if (st->shift_state == 1) { st->Lch_noise[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[0]); move32(); } else { st->Lch_noise[i] = st->Lch_enrg[i]; move32(); } } } } } /* Compute the channel energy (in dB), the channel SNRs, and the sum of voice metrics */ vm_sum = 0; move16(); for (i = LO_CHAN; i <= HI_CHAN; i++) { ch_enrg_db[i] = fn10Log10(st->Lch_enrg[i], fbits[st->shift_state]); move16(); ch_noise_db = fn10Log10(st->Lch_noise[i], FRACTIONAL_BITS_0); ch_snr[i] = sub(ch_enrg_db[i], ch_noise_db); move16(); /* quantize channel SNR in 3/8 dB steps (scaled 7,8 => 15,0) */ /* ch_snr = round((snr/(3/8))>>8) */ /* = round(((0.6667*snr)<<2)>>8) */ /* = round((0.6667*snr)>>6) */ ch_snrq = shr_r(mult(21845, ch_snr[i]), 6); /* Accumulate the sum of voice metrics */ test(); if (sub(ch_snrq, 89) < 0) { test(); if (ch_snrq > 0) { j = ch_snrq; move16(); } else { j = 0; move16(); } } else { j = 89; move16(); } vm_sum = add(vm_sum, vm_tbl[j]); } /* Initialize NOMINAL peak voice energy and average noise energy, calculate instantaneous SNR */ test(),test(),logic16(); if (L_sub(st->Lframe_cnt, 4) <= 0 || st->fupdate_flag == TRUE) { /* tce_db = (96 - 22 - 10*log10(64) (due to FFT)) scaled as 7,8 */ tce_db = 14320; move16(); st->negSNRvar = 0; move16(); st->negSNRbias = 0; move16(); /* Compute the total noise estimate (Ltne) */ Ltne = 0; move32(); for (i = LO_CHAN; i <= HI_CHAN; i++) { Ltne = L_add(Ltne, st->Lch_noise[i]); } /* Get total noise in dB */ tne_db = fn10Log10(Ltne, FRACTIONAL_BITS_0); /* Initialise instantaneous and long-term peak signal-to-noise ratios */ xt = sub(tce_db, tne_db); st->tsnr = xt; move16(); } else { /* Calculate instantaneous frame signal-to-noise ratio */ /* xt = 10*log10( sum(2.^(ch_snr*0.1*log2(10)))/length(ch_snr) ) */ Ltmp1 = 0; move32(); for (i=LO_CHAN; i<=HI_CHAN; i++) { /* Ltmp2 = ch_snr[i] * 0.1 * log2(10); (ch_snr scaled as 7,8) */ Ltmp2 = L_shr(L_mult(ch_snr[i], 10885), 8); L_Extract(Ltmp2, &hi1, &lo1); hi1 = add(hi1, 3); /* 2^3 to compensate for negative SNR */ Ltmp1 = L_add(Ltmp1, Pow2(hi1, lo1)); } xt = fn10Log10(Ltmp1, 4+3); /* average by 16, inverse compensation 2^3 */ /* Estimate long-term "peak" SNR */ test(),test(); if (sub(xt, st->tsnr) > 0) { /* tsnr = 0.9*tsnr + 0.1*xt; */ st->tsnr = round(L_add(L_mult(29491, st->tsnr), L_mult(3277, xt))); } /* else if (xt > 0.625*tsnr) */ else if (sub(xt, mult(20480, st->tsnr)) > 0) { /* tsnr = 0.998*tsnr + 0.002*xt; */ st->tsnr = round(L_add(L_mult(32702, st->tsnr), L_mult(66, xt))); } } /* Quantize the long-term SNR in 3 dB steps, limit to 0 <= tsnrq <= 19 */ tsnrq = shr(mult(st->tsnr, 10923), 8); /* tsnrq = min(19, max(0, tsnrq)); */ test(),test(); if (sub(tsnrq, 19) > 0) { tsnrq = 19; move16(); } else if (tsnrq < 0) { tsnrq = 0; move16(); } /* Calculate the negative SNR sensitivity bias */ test(); if (xt < 0) { /* negSNRvar = 0.99*negSNRvar + 0.01*xt*xt; */ /* xt scaled as 7,8 => xt*xt scaled as 14,17, shift to 7,8 and round */ tmp = round(L_shl(L_mult(xt, xt), 7)); st->negSNRvar = round(L_add(L_mult(32440, st->negSNRvar), L_mult(328, tmp))); /* if (negSNRvar > 4.0) negSNRvar = 4.0; */ test(); if (sub(st->negSNRvar, 1024) > 0) { st->negSNRvar = 1024; move16(); } /* negSNRbias = max(12.0*(negSNRvar - 0.65), 0.0); */ tmp = mult_r(shl(sub(st->negSNRvar, 166), 4), 24576); test(); if (tmp < 0) { st->negSNRbias = 0; move16(); } else { st->negSNRbias = shr(tmp, 8); } } /* Determine VAD as a function of the voice metric sum and quantized SNR */ tmp = add(vm_threshold_table[tsnrq], st->negSNRbias); test(); if (sub(vm_sum, tmp) > 0) { ivad = 1; move16(); st->burstcount = add(st->burstcount, 1); test(); if (sub(st->burstcount, burstcount_table[tsnrq]) > 0) { st->hangover = hangover_table[tsnrq]; move16(); } } else { st->burstcount = 0; move16(); st->hangover = sub(st->hangover, 1); test(); if (st->hangover <= 0) { ivad = 0; move16(); st->hangover = 0; move16(); } else { ivad = 1; move16(); } } /* Calculate log spectral deviation */ ch_enrg_dev = 0; move16(); test(); if (L_sub(st->Lframe_cnt, 1) == 0) { for (i = LO_CHAN; i <= HI_CHAN; i++) { st->ch_enrg_long_db[i] = ch_enrg_db[i]; move16(); } } else { for (i = LO_CHAN; i <= HI_CHAN; i++) { tmp = abs_s(sub(st->ch_enrg_long_db[i], ch_enrg_db[i])); ch_enrg_dev = add(ch_enrg_dev, tmp); } } /* * Calculate long term integration constant as a function of instantaneous SNR * (i.e., high SNR (tsnr dB) -> slower integration (alpha = HIGH_ALPHA), * low SNR (0 dB) -> faster integration (alpha = LOW_ALPHA) */ /* alpha = HIGH_ALPHA - ALPHA_RANGE * (tsnr - xt) / tsnr, low <= alpha <= high */ tmp = sub(st->tsnr, xt); test(),logic16(),test(),test(); if (tmp <= 0 || st->tsnr <= 0) { alpha = HIGH_ALPHA; move16(); one_m_alpha = 32768L-HIGH_ALPHA; move16(); } else if (sub(tmp, st->tsnr) > 0) { alpha = LOW_ALPHA; move16(); one_m_alpha = 32768L-LOW_ALPHA; move16(); } else { tmp = div_s(tmp, st->tsnr); alpha = sub(HIGH_ALPHA, mult(ALPHA_RANGE, tmp)); one_m_alpha = sub(32767, alpha); } /* Calc long term log spectral energy */ for (i = LO_CHAN; i <= HI_CHAN; i++) { Ltmp1 = L_mult(one_m_alpha, ch_enrg_db[i]); Ltmp2 = L_mult(alpha, st->ch_enrg_long_db[i]); st->ch_enrg_long_db[i] = round(L_add(Ltmp1, Ltmp2)); } /* Set or clear the noise update flags */ update_flag = FALSE; move16(); st->fupdate_flag = FALSE; move16(); test(),test(); if (sub(vm_sum, UPDATE_THLD) <= 0) { test(); if (st->burstcount == 0) { update_flag = TRUE; move16(); st->update_cnt = 0; move16(); } } else if (L_sub(Ltce, noise_floor_chan[st->shift_state]) > 0) { test(); if (sub(ch_enrg_dev, DEV_THLD) < 0) { test(); if (p2a_flag == FALSE) { test(); if (st->LTP_flag == FALSE) { st->update_cnt = add(st->update_cnt, 1); test(); if (sub(st->update_cnt, UPDATE_CNT_THLD) >= 0) { update_flag = TRUE; move16(); st->fupdate_flag = TRUE; move16(); } } } } } test(); if (sub(st->update_cnt, st->last_update_cnt) == 0) { st->hyster_cnt = add(st->hyster_cnt, 1); } else { st->hyster_cnt = 0; move16(); } st->last_update_cnt = st->update_cnt; move16(); test(); if (sub(st->hyster_cnt, HYSTER_CNT_THLD) > 0) { st->update_cnt = 0; move16(); } /* Conditionally update the channel noise estimates */ test(); if (update_flag == TRUE) { /* Check shift state */ test(); if (st->shift_state == 1) { /* get factor to shift ch_enrg[] from state 1 to 0 (noise always state 0) */ tmp = state_change_shift_r[0]; move16(); } else { /* No shift if already state 0 */ tmp = 0; move16(); } /* Update noise energy estimate */ for (i = LO_CHAN; i <= HI_CHAN; i++) { test(); /* integrate over time: en[i] = (1-alpha)*en[i] + alpha*e[n] */ /* (extract with shift compensation for state 1) */ L_Extract (L_shr(st->Lch_enrg[i], tmp), &hi1, &lo1); Ltmp = Mpy_32_16(hi1, lo1, CNE_SM_FAC); L_Extract (st->Lch_noise[i], &hi1, &lo1); st->Lch_noise[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, ONE_MINUS_CNE_SM_FAC)); move32(); /* Limit low level noise */ test(); if (L_sub(st->Lch_noise[i], MIN_NOISE_ENRG_0) < 0) { st->Lch_noise[i] = MIN_NOISE_ENRG_0; move32(); } } } return(ivad); } /* end of vad2 () */
void A_Refl( Word16 a[], /* i : Directform coefficients */ Word16 refl[], /* o : Reflection coefficients */ Flag *pOverflow ) { /* local variables */ Word16 i; Word16 j; Word16 aState[M]; Word16 bState[M]; Word16 normShift; Word16 normProd; Word32 L_acc; Word16 scale; Word32 L_temp; Word16 temp; Word16 mult; /* initialize states */ for (i = 0; i < M; i++) { aState[i] = a[i]; } /* backward Levinson recursion */ for (i = M - 1; i >= 0; i--) { if (abs_s(aState[i]) >= 4096) { for (i = 0; i < M; i++) { refl[i] = 0; } break; } refl[i] = shl(aState[i], 3, pOverflow); L_temp = L_mult(refl[i], refl[i], pOverflow); L_acc = L_sub(MAX_32, L_temp, pOverflow); normShift = norm_l(L_acc); scale = sub(15, normShift, pOverflow); L_acc = L_shl(L_acc, normShift, pOverflow); normProd = pv_round(L_acc, pOverflow); mult = div_s(16384, normProd); for (j = 0; j < i; j++) { L_acc = L_deposit_h(aState[j]); L_acc = L_msu(L_acc, refl[i], aState[i-j-1], pOverflow); temp = pv_round(L_acc, pOverflow); L_temp = L_mult(mult, temp, pOverflow); L_temp = L_shr_r(L_temp, scale, pOverflow); if (L_abs(L_temp) > 32767) { for (i = 0; i < M; i++) { refl[i] = 0; } break; } bState[j] = extract_l(L_temp); } for (j = 0; j < i; j++) { aState[j] = bState[j]; } } return; }
static Word16 D4i40_17_fast(/*(o) : Index of pulses positions. */ Word16 dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) Q12: Impulse response of filters. */ Word16 cod[], /* (o) Q13: Selected algebraic codeword. */ Word16 y[], /* (o) Q12: Filtered algebraic codeword. */ Word16 *sign /* (o) : Signs of 4 pulses. */ ) { Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3; Word16 i, j, ix, iy, track, trk, max; Word16 prev_i0, i1_offset; Word16 psk, ps, ps0, ps1, ps2, sq, sq2; Word16 alpk, alp, alp_16; Word32 s, alp0, alp1, alp2; Word16 *p0, *p1, *p2, *p3, *p4; Word16 sign_dn[L_SUBFR], sign_dn_inv[L_SUBFR], *psign; Word16 tmp_vect[NB_POS]; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *ptr_rri0i3_i4; Word16 *ptr_rri1i3_i4; Word16 *ptr_rri2i3_i4; Word16 *ptr_rri3i3_i4; /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if (dn[i] >= 0) { sign_dn[i] = MAX_16; sign_dn_inv[i] = MIN_16; } else { sign_dn[i] = MIN_16; sign_dn_inv[i] = MAX_16; dn[i] = negate(dn[i]); } } /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ p0 = rri0i1; p1 = rri0i2; p2 = rri0i3; p3 = rri0i4; for(i0=0; i0<L_SUBFR; i0+=STEP) { psign = sign_dn; if (psign[i0] < 0) psign = sign_dn_inv; for(i1=1; i1<L_SUBFR; i1+=STEP) { *p0++ = mult(*p0, psign[i1]); *p1++ = mult(*p1, psign[i1+1]); *p2++ = mult(*p2, psign[i1+2]); *p3++ = mult(*p3, psign[i1+3]); } } p0 = rri1i2; p1 = rri1i3; p2 = rri1i4; for(i1=1; i1<L_SUBFR; i1+=STEP) { psign = sign_dn; if (psign[i1] < 0) psign = sign_dn_inv; for(i2=2; i2<L_SUBFR; i2+=STEP) { *p0++ = mult(*p0, psign[i2]); *p1++ = mult(*p1, psign[i2+1]); *p2++ = mult(*p2, psign[i2+2]); } } p0 = rri2i3; p1 = rri2i4; for(i2=2; i2<L_SUBFR; i2+=STEP) { psign = sign_dn; if (psign[i2] < 0) psign = sign_dn_inv; for(i3=3; i3<L_SUBFR; i3+=STEP) { *p0++ = mult(*p0, psign[i3]); *p1++ = mult(*p1, psign[i3+1]); } } /*-------------------------------------------------------------------* * Search the optimum positions of the four pulses which maximize * * square(correlation) / energy * *-------------------------------------------------------------------*/ psk = -1; alpk = 1; ptr_rri0i3_i4 = rri0i3; ptr_rri1i3_i4 = rri1i3; ptr_rri2i3_i4 = rri2i3; ptr_rri3i3_i4 = rri3i3; /* Initializations only to remove warning from some compilers */ ip0=0; ip1=1; ip2=2; ip3=3; ix=0; iy=0; ps=0; /* search 2 times: track 3 and 4 */ for (track=3, trk=0; track<5; track++, trk++) { /*------------------------------------------------------------------* * depth first search 3, phase A: track 2 and 3/4. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 2 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 2 */ for (j=2; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = rri2i2 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 2 */ p0 = ptr_rri2i3_i4 + shl(j, 3); p1 = ptr_rri3i3_i4; for (i1=track; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0++, _1_2); alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 3, phase B: track 0 and 1. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = rri1i2 + mult(i0, 6554); p1 = ptr_rri1i3_i4 + mult(i1, 6554); p2 = rri1i1; p3 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1, _1_4); p1 += NB_POS; s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 0 */ p0 = rri0i2 + mult(i0, 6554); p1 = ptr_rri0i3_i4 + mult(i1, 6554); p2 = rri0i0; p3 = rri0i1; for (i2=0; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1, _1_8); p1 += NB_POS; alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 1 */ p4 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 3: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip2 = i0; ip3 = i1; ip0 = ix; ip1 = iy; } /*------------------------------------------------------------------* * depth first search 4, phase A: track 3 and 0. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 3/4 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 3/4 */ for (j=track; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = ptr_rri3i3_i4 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 0 */ p0 = ptr_rri0i3_i4 + j; p1 = rri0i0; for (i1=0; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0, _1_2); p0 += NB_POS; alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 4, phase B: track 1 and 2. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = ptr_rri2i3_i4 + mult(i0, 6554); p1 = rri0i2 + i1_offset; p2 = rri2i2; p3 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1++, _1_4); s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 1 */ p0 = ptr_rri1i3_i4 + mult(i0, 6554); p1 = rri0i1 + i1_offset; p2 = rri1i1; p3 = rri1i2; for (i2=1; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1++, _1_8); alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 2 */ p4 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 1: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip3 = i0; ip0 = i1; ip1 = ix; ip2 = iy; } ptr_rri0i3_i4 = rri0i4; ptr_rri1i3_i4 = rri1i4; ptr_rri2i3_i4 = rri2i4; ptr_rri3i3_i4 = rri4i4; } /* Set the sign of impulses */ i0 = sign_dn[ip0]; i1 = sign_dn[ip1]; i2 = sign_dn[ip2]; i3 = sign_dn[ip3]; /* Find the codeword corresponding to the selected positions */ for(i=0; i<L_SUBFR; i++) { cod[i] = 0; } cod[ip0] = shr(i0, 2); /* From Q15 to Q13 */ cod[ip1] = shr(i1, 2); cod[ip2] = shr(i2, 2); cod[ip3] = shr(i3, 2); /* find the filtered codeword */ for (i = 0; i < ip0; i++) y[i] = 0; if(i0 > 0) for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = h[j]; else for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]); if(i1 > 0) for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i2 > 0) for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i3 > 0) for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); /* find codebook index; 17-bit address */ i = 0; if(i0 > 0) i = add(i, 1); if(i1 > 0) i = add(i, 2); if(i2 > 0) i = add(i, 4); if(i3 > 0) i = add(i, 8); *sign = i; ip0 = mult(ip0, 6554); /* ip0/5 */ ip1 = mult(ip1, 6554); /* ip1/5 */ ip2 = mult(ip2, 6554); /* ip2/5 */ i = mult(ip3, 6554); /* ip3/5 */ j = add(i, shl(i, 2)); /* j = i*5 */ j = sub(ip3, add(j, 3)); /* j= ip3%5 -3 */ ip3 = add(shl(i, 1), j); i = add(ip0, shl(ip1, 3)); i = add(i , shl(ip2, 6)); i = add(i , shl(ip3, 9)); return i; }
static Word16 D2i40_11( /* (o) : Index of pulses positions. */ Word16 Dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) : Impulse response of filters. */ Word16 code[], /* (o) : Selected algebraic codeword. */ Word16 y[], /* (o) : Filtered algebraic codeword. */ Word16 *sign, /* (o) : Signs of 4 pulses. */ Word16 i_subfr /* (i) : subframe flag */ ) { Word16 i0, i1, ip0, ip1, p0, p1; Word16 i, j, index, tmp, swap; Word16 ps0, ps1, alp, alp0; Word32 alp1; Word16 ps1c, psc, alpha; Word32 L_temp; Word16 posIndex[2], signIndex[2]; Word16 m0_bestPos, m1_bestPos; Word16 p_sign[L_SUBFR]; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *RRi1i1, *rri0i3, *RRi3i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3; Word16 *ptr_ri0i0, *ptr_ri1i1; Word16 *ptr_ri0i1, *ptr_Ri0i2, *ptr_ri0i3, *ptr_Ri3i4; Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4; Word16 *ptr_ri2i3; Word16 *outPtr_ri1i1; /* Outside loop pointer */ /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; RRi1i1 = rri0i1 + MSIZE; /* Special for 6.4 kbps */ rri0i3 = RRi1i1 + MSIZE; RRi3i4 = rri0i3 + MSIZE; /* Special for 6.4 kbps */ rri1i2 = RRi3i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if( Dn[i] >= 0) { p_sign[i] = 0x7fff; } else { p_sign[i] = (Word16)0x8000; Dn[i] = negate(Dn[i]); } } /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ ptr_ri0i1 = rri0i1; ptr_ri0i3 = rri0i3; for(i0=0; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1])); ptr_ri0i1++; *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2])); ptr_ri0i3++; } } ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for(i0=1; i0<L_SUBFR; i0+=STEP) { for(i1=2; i1<L_SUBFR; i1+=STEP) { *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i0], p_sign[i1])); ptr_ri1i2++; *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i0], p_sign[i1+1])); ptr_ri1i3++; *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i0], p_sign[i1+2])); ptr_ri1i4++; } } ptr_ri2i3 = rri2i3; ptr_Ri3i4 = RRi3i4; for(i0=2; i0<L_SUBFR; i0+=STEP) { for(i1=3; i1<L_SUBFR; i1+=STEP) { *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i0], p_sign[i1])); ptr_ri2i3++; *ptr_Ri3i4 = mult(*ptr_Ri3i4, mult(p_sign[i0+1], p_sign[i1+1])); ptr_Ri3i4++; } } ptr_Ri0i2 = RRi1i1; for(i0=1; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_Ri0i2 = mult(*ptr_Ri0i2, mult(p_sign[i0], p_sign[i1])); ptr_Ri0i2++; } } /*-------------------------------------------------------------------* * The actual search. * *-------------------------------------------------------------------*/ ip0 = 1; /* Set to any valid pulse position */ ip1 = 0; /* Set to any valid pulse position */ psc = 0; alpha = MAX_16; ptr_ri0i1 = rri0i1; outPtr_ri1i1 = rri1i1; /* Initial values for tripple loop below */ p0=0; /* Search i0,sub0 vs. i1,sub0 */ p1=1; ptr_ri0i0 = rri0i0; for (i = 0; i<9; i++) { if (i == 4) i++; /* To get right exchange sequence */ swap = i & 1; if (i == 1) p0=1; /* Search i0,sub1 vs. i1,sub0 */ else if (i == 2) { /* Search i0,sub0 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p0=0; p1=3; ptr_ri0i0 = rri0i0; } else if (i == 3) { /* Search i0,sub3 vs. i1,sub1 */ outPtr_ri1i1 = rri4i4; p0=3; p1=4; ptr_ri0i0 = rri3i3; } else if (i == 5) { /* Search i0,sub2 vs. i1,sub0 */ outPtr_ri1i1 = rri2i2; p0=1; p1=2; ptr_ri0i0 = rri1i1; } else if (i == 6) { /* Search i0,sub1 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p1=3; ptr_ri0i0 = rri1i1; } else if (i == 7) { /* Search i0,sub3 vs. i1,sub0 */ outPtr_ri1i1 = rri4i4; p1=4; ptr_ri0i0 = rri1i1; } else if (i == 8) { /* Search i0,sub2 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p0=2; p1=3; } for (i0 = p0; i0<40; i0+=STEP) { ptr_ri1i1 = outPtr_ri1i1; ps0 = Dn[i0]; alp0 = *ptr_ri0i0++; for (i1 = p1; i1<40; i1+=STEP) { ps1 = add(ps0, Dn[i1]); alp1 = L_mult(alp0, 1); alp1 = L_mac(alp1, *ptr_ri1i1++, 1); alp1 = L_mac(alp1, *ptr_ri0i1++, 2); alp = extract_l(L_shr(alp1, 5)); ps1c = mult(ps1, ps1); L_temp = L_mult(ps1c, alpha); L_temp = L_msu(L_temp, psc, alp); if (L_temp > 0L) { psc = ps1c; alpha = alp; ip0 = i1; ip1 = i0; if ( swap ) { ip0 = i0; ip1 = i1; } } } } } /* convert from position to table entry index */ for (i0=0; i0<16; i0++) if (ip0 == trackTable0[i0]) break; ip0=i0; for (i1=0; i1<32; i1++) if (ip1 == trackTable1[i1]) break; ip1=i1; m0_bestPos = trackTable0[ip0]; m1_bestPos = trackTable1[ip1]; posIndex[0] = grayEncode[ip0]; posIndex[1] = grayEncode[ip1]; if (p_sign[m0_bestPos] > 0) signIndex[0] = 1; else signIndex[0] = 0; if (p_sign[m1_bestPos] > 0) signIndex[1] = 1; else signIndex[1] = 0; /* build innovation vector */ for (i = 0; i < L_SUBFR; i++) code[i] = 0; code[m0_bestPos] = shr(p_sign[m0_bestPos], 2); code[m1_bestPos] = add(code[m1_bestPos], shr(p_sign[m1_bestPos], 2)); *sign = add(signIndex[1], signIndex[1]); *sign = add(*sign, signIndex[0]); tmp = shl(posIndex[1], 4); index = add(posIndex[0], tmp); /* compute filtered cbInnovation */ for (i = 0; i < L_SUBFR; i++) y[i] = 0; if(signIndex[0] == 0) for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]); else for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = h[j]; if(signIndex[1] == 0) for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); else for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); return index; }
static void Cor_h( Word16 *H, /* (i) Q12 :Impulse response of filters */ Word16 *rr /* (o) :Correlations of H[] */ ) { Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *p0, *p1, *p2, *p3, *p4; Word16 *ptr_hd, *ptr_hf, *ptr_h1, *ptr_h2; Word32 cor; Word16 i, k, ldec, l_fin_sup, l_fin_inf; Word16 h[L_SUBFR]; /* Scaling h[] for maximum precision */ cor = 0; for(i=0; i<L_SUBFR; i++) cor = L_mac(cor, H[i], H[i]); if(sub(extract_h(cor),32000) > 0) { for(i=0; i<L_SUBFR; i++) { h[i] = shr(H[i], 1); } } else { k = norm_l(cor); k = shr(k, 1); for(i=0; i<L_SUBFR; i++) { h[i] = shl(H[i], k); } } /*------------------------------------------------------------* * Compute rri0i0[], rri1i1[], rri2i2[], rri3i3 and rri4i4[] * *------------------------------------------------------------*/ /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; p0 = rri0i0 + NB_POS-1; /* Init pointers to last position of rrixix[] */ p1 = rri1i1 + NB_POS-1; p2 = rri2i2 + NB_POS-1; p3 = rri3i3 + NB_POS-1; p4 = rri4i4 + NB_POS-1; ptr_h1 = h; cor = 0; for(i=0; i<NB_POS; i++) { cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p4-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p3-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p2-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p1-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p0-- = extract_h(cor); } /*-----------------------------------------------------------------* * Compute elements of: rri2i3[], rri1i2[], rri0i1[] and rri0i4[] * *-----------------------------------------------------------------*/ l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; ldec = NB_POS+1; ptr_hd = h; ptr_hf = ptr_hd + 1; for(k=0; k<NB_POS; k++) { p3 = rri2i3 + l_fin_sup; p2 = rri1i2 + l_fin_sup; p1 = rri0i1 + l_fin_sup; p0 = rri0i4 + l_fin_inf; cor = 0; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*---------------------------------------------------------------------* * Compute elements of: rri2i4[], rri1i3[], rri0i2[], rri1i4[], rri0i3 * *---------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 2; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { p4 = rri2i4 + l_fin_sup; p3 = rri1i3 + l_fin_sup; p2 = rri0i2 + l_fin_sup; p1 = rri1i4 + l_fin_inf; p0 = rri0i3 + l_fin_inf; cor = 0; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p4 -= ldec; p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*----------------------------------------------------------------------* * Compute elements of: rri1i4[], rri0i3[], rri2i4[], rri1i3[], rri0i2 * *----------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 3; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { p4 = rri1i4 + l_fin_sup; p3 = rri0i3 + l_fin_sup; p2 = rri2i4 + l_fin_inf; p1 = rri1i3 + l_fin_inf; p0 = rri0i2 + l_fin_inf; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; cor = 0; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p4 -= ldec; p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*----------------------------------------------------------------------* * Compute elements of: rri0i4[], rri2i3[], rri1i2[], rri0i1[] * *----------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 4; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { p3 = rri0i4 + l_fin_sup; p2 = rri2i3 + l_fin_inf; p1 = rri1i2 + l_fin_inf; p0 = rri0i1 + l_fin_inf; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; cor = 0; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } return; }
static void Cor_h_D( Word16 *H, /* (i) Q12 :Impulse response of filters */ Word16 *rr /* (o) :Correlations of H[] */ ) { Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *p0, *p1, *p2, *p3, *p4; Word16 *ptr_hd, *ptr_hf, *ptr_h1, *ptr_h2; Word32 cor; Word16 i, k, ldec, l_fin_sup, l_fin_inf; Word16 h[L_SUBFR]; Word32 L_tmp; Word16 lsym; /* Scaling h[] for maximum precision */ cor = 0; for(i=0; i<L_SUBFR; i++) cor = L_mac(cor, H[i], H[i]); L_tmp = L_sub(extract_h(cor),32000); if(L_tmp>0L ) { for(i=0; i<L_SUBFR; i++) { h[i] = shr(H[i], 1); } } else { k = norm_l(cor); k = shr(k, 1); for(i=0; i<L_SUBFR; i++) { h[i] = shl(H[i], k); } } /*-----------------------------------------------------------------* * In case of G729 mode, nine cross correlations has to be * * calculated, namely the following: * * * * rri0i1[], * * rri0i2[], rri1i2[], * * rri0i3[], rri1i3[], rri2i3[], * * rri0i4[], rri1i4[], rri2i4[], * * * * In case of G729 on 6.4 kbps mode, three of the above nine cross * * correlations are not needed for the codebook search, namely * * rri0i2[], rri0i4[] and rri2i4[]. Two of these three 64-element * * positions are instead used by two cross correlations needed * * only by the 6.4 kbps mode (see D2i40_11() for details). * *-----------------------------------------------------------------*/ /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; /* Holds RRi1i1[] in 6.4 kbps mode */ rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; /* Holds RRi3i4[] in 6.4 kbps mode */ rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /* Not used in 6.4 kbps mode */ /*------------------------------------------------------------* * Compute rri0i0[], rri1i1[], rri2i2[], rri3i3 and rri4i4[] * *------------------------------------------------------------*/ p0 = rri0i0 + NB_POS-1; /* Init pointers to last position of rrixix[] */ p1 = rri1i1 + NB_POS-1; p2 = rri2i2 + NB_POS-1; p3 = rri3i3 + NB_POS-1; p4 = rri4i4 + NB_POS-1; ptr_h1 = h; cor = 0; for(i=0; i<NB_POS; i++) { cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p4-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p3-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p2-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p1-- = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h1); ptr_h1++; *p0-- = extract_h(cor); } /*-----------------------------------------------------------------* * Compute elements of: rri2i3[], rri1i2[], rri0i1[] and rri0i4[] * *-----------------------------------------------------------------*/ l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; ldec = NB_POS+1; ptr_hd = h; ptr_hf = ptr_hd + 1; for(k=0; k<NB_POS; k++) { p4 = rri0i4 + l_fin_sup; p3 = rri2i3 + l_fin_sup; p2 = rri1i2 + l_fin_sup; p1 = rri0i1 + l_fin_sup; p0 = rri0i4 + l_fin_inf; cor = 0; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 1) == 0) *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 2) == 0) *p0 = extract_h(cor); p4 -= ldec; p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 1) == 0) *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*---------------------------------------------------------------------* * Compute elements of: rri2i4[], rri1i3[], rri0i2[], rri1i4[], rri0i3 * *---------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 2; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { p4 = rri2i4 + l_fin_sup; p3 = rri1i3 + l_fin_sup; p2 = rri0i2 + l_fin_sup; p1 = rri1i4 + l_fin_inf; p0 = rri0i3 + l_fin_inf; cor = 0; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p4 -= ldec; p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*----------------------------------------------------------------------* * Compute elements of: rri1i4[], rri0i3[], rri2i4[], rri1i3[], rri0i2 * *----------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 3; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { p4 = rri1i4 + l_fin_sup; p3 = rri0i3 + l_fin_sup; p2 = rri2i4 + l_fin_inf; p1 = rri1i3 + l_fin_inf; p0 = rri0i2 + l_fin_inf; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; cor = 0; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p4 -= ldec; p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p4 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p3 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } /*----------------------------------------------------------------------* * Compute elements of: rri0i4[], rri2i3[], rri1i2[], rri0i1[] * *----------------------------------------------------------------------*/ ptr_hd = h; ptr_hf = ptr_hd + 4; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-(Word16)1; for(k=0; k<NB_POS; k++) { if (sub(CODEC_MODE, 2) == 0) p3 = rri0i4 + l_fin_sup; if (sub(CODEC_MODE, 1) == 0) p3 = rri0i4 + l_fin_inf; p2 = rri2i3 + l_fin_inf; p1 = rri1i2 + l_fin_inf; p0 = rri0i1 + l_fin_inf; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; cor = 0; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 2) == 0) *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 1) == 0) *p3 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p2 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p1 = extract_h(cor); cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); p3 -= ldec; p2 -= ldec; p1 -= ldec; p0 -= ldec; } cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; if (sub(CODEC_MODE, 2) == 0) *p3 = extract_h(cor); l_fin_sup -= NB_POS; l_fin_inf--; ptr_hf += STEP; } if (sub(CODEC_MODE, 1) == 0) { /*-----------------------------------------------------------------* * Compute elements of RRi1i1[] * *-----------------------------------------------------------------*/ p0 = rri0i2; for (k=0; k<NB_POS; k++) { *p0 = *rri1i1; rri1i1++; p0 += ldec; } ptr_hd = h; ptr_hf = ptr_hd + 5; l_fin_sup = MSIZE-1; l_fin_inf = l_fin_sup-NB_POS; lsym = NB_POS - (Word16)1; for(k=(Word16)1; k<NB_POS; k++) { p0 = rri0i2 + l_fin_inf; ptr_h1 = ptr_hd; ptr_h2 = ptr_hf; cor = 0; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); *(p0+lsym) = extract_h(cor); p0 -= ldec; for(i=k+(Word16)1; i<NB_POS; i++ ) { cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; cor = L_mac(cor, *ptr_h1, *ptr_h2); ptr_h1++; ptr_h2++; *p0 = extract_h(cor); *(p0+lsym) = extract_h(cor); p0 -= ldec; } l_fin_inf -= NB_POS; ptr_hf += STEP; lsym += NB_POS - (Word16)1; } } return; }
/* ******************************************************************************** * PUBLIC PROGRAM CODE ******************************************************************************** */ Word16 hp_max ( Word32 corr[], /* i : correlation vector. */ Word16 scal_sig[], /* i : scaled signal. */ Word16 L_frame, /* i : length of frame to compute pitch */ Word16 lag_max, /* i : maximum lag */ Word16 lag_min, /* i : minimum lag */ Word16 *cor_hp_max) /* o : max high-pass filtered norm. correlation */ { Word16 i; Word16 *p, *p1; Word32 max, t0, t1; Word16 max16, t016, cor_max; Word16 shift, shift1, shift2; max = MIN_32; move32 (); t0 = 0L; move32 (); for (i = lag_max-1; i > lag_min; i--) { /* high-pass filtering */ t0 = L_sub (L_sub(L_shl(corr[-i], 1), corr[-i-1]), corr[-i+1]); t0 = L_abs (t0); test (); if (L_sub (t0, max) >= 0) { max = t0; move32 (); } } /* compute energy */ p = scal_sig; move16 (); p1 = &scal_sig[0]; move16 (); t0 = 0L; move32 (); for (i = 0; i < L_frame; i++, p++, p1++) { t0 = L_mac (t0, *p, *p1); } p = scal_sig; move16 (); p1 = &scal_sig[-1]; move16 (); t1 = 0L; move32 (); for (i = 0; i < L_frame; i++, p++, p1++) { t1 = L_mac (t1, *p, *p1); } /* high-pass filtering */ t0 = L_sub(L_shl(t0, 1), L_shl(t1, 1)); t0 = L_abs (t0); /* max/t0 */ shift1 = sub(norm_l(max), 1); max16 = extract_h(L_shl(max, shift1)); shift2 = norm_l(t0); t016 = extract_h(L_shl(t0, shift2)); test (); if (t016 != 0) { cor_max = div_s(max16, t016); } else { cor_max = 0; move16 (); } shift = sub(shift1, shift2); test (); if (shift >= 0) { *cor_hp_max = shr(cor_max, shift); move16 (); /* Q15 */ } else { *cor_hp_max = shl(cor_max, negate(shift)); move16 (); /* Q15 */ } return 0; }
static Word16 D4i40_17( /* (o) : Index of pulses positions. */ Word16 Dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) Q12: Impulse response of filters. */ Word16 cod[], /* (o) Q13: Selected algebraic codeword. */ Word16 y[], /* (o) Q12: Filtered algebraic codeword. */ Word16 *sign, /* (o) : Signs of 4 pulses. */ Word16 i_subfr /* (i) : subframe flag */ ) { Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3; Word16 i, j, time; Word16 ps0, ps1, ps2, ps3, alp, alp0; Word32 alp1, alp2, alp3, L32; Word16 ps3c, psc, alpha; Word16 average, max0, max1, max2, thres; Word32 L_temp; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *ptr_ri0i0, *ptr_ri1i1, *ptr_ri2i2, *ptr_ri3i3, *ptr_ri4i4; Word16 *ptr_ri0i1, *ptr_ri0i2, *ptr_ri0i3, *ptr_ri0i4; Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4; Word16 *ptr_ri2i3, *ptr_ri2i4; Word16 p_sign[L_SUBFR]; /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /*-----------------------------------------------------------------------* * Reset max_time for 1st subframe. * *-----------------------------------------------------------------------*/ if (i_subfr == 0) { extra = 30; } /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if( Dn[i] >= 0) { p_sign[i] = 0x7fff; } else { p_sign[i] = (Word16)0x8000; Dn[i] = negate(Dn[i]); } } /*-------------------------------------------------------------------* * - Compute the search threshold after three pulses * *-------------------------------------------------------------------*/ /* Find maximum of Dn[i0]+Dn[i1]+Dn[i2] */ max0 = Dn[0]; max1 = Dn[1]; max2 = Dn[2]; for (i = 5; i < L_SUBFR; i+=STEP) { if (sub(Dn[i] , max0) > 0) { max0 = Dn[i]; } if (sub(Dn[i+1], max1) > 0) { max1 = Dn[i+1]; } if (sub(Dn[i+2], max2) > 0) { max2 = Dn[i+2]; } } max0 = add(max0, max1); max0 = add(max0, max2); /* Find average of Dn[i0]+Dn[i1]+Dn[i2] */ L32 = 0; for (i = 0; i < L_SUBFR; i+=STEP) { L32 = L_mac(L32, Dn[i], 1); L32 = L_mac(L32, Dn[i+1], 1); L32 = L_mac(L32, Dn[i+2], 1); } average =extract_l( L_shr(L32, 4)); /* 1/8 of sum */ /* thres = average + (max0-average)*THRESHFCB; */ thres = sub(max0, average); thres = mult(thres, THRESHFCB); thres = add(thres, average); /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ ptr_ri0i1 = rri0i1; ptr_ri0i2 = rri0i2; ptr_ri0i3 = rri0i3; ptr_ri0i4 = rri0i4; for(i0=0; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1])); ptr_ri0i1++; *ptr_ri0i2 = mult(*ptr_ri0i2, mult(p_sign[i0], p_sign[i1+1])); ptr_ri0i2++; *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2])); ptr_ri0i3++; *ptr_ri0i4 = mult(*ptr_ri0i4, mult(p_sign[i0], p_sign[i1+3])); ptr_ri0i4++; } } ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for(i1=1; i1<L_SUBFR; i1+=STEP) { for(i2=2; i2<L_SUBFR; i2+=STEP) { *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i1], p_sign[i2])); ptr_ri1i2++; *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i1], p_sign[i2+1])); ptr_ri1i3++; *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i1], p_sign[i2+2])); ptr_ri1i4++; } } ptr_ri2i3 = rri2i3; ptr_ri2i4 = rri2i4; for(i2=2; i2<L_SUBFR; i2+=STEP) { for(i3=3; i3<L_SUBFR; i3+=STEP) { *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i2], p_sign[i3])); ptr_ri2i3++; *ptr_ri2i4 = mult(*ptr_ri2i4, mult(p_sign[i2], p_sign[i3+1])); ptr_ri2i4++; } } /*-------------------------------------------------------------------* * Search the optimum positions of the four pulses which maximize * * square(correlation) / energy * * The search is performed in four nested loops. At each loop, one * * pulse contribution is added to the correlation and energy. * * * * The fourth loop is entered only if the correlation due to the * * contribution of the first three pulses exceeds the preset * * threshold. * *-------------------------------------------------------------------*/ /* Default values */ ip0 = 0; ip1 = 1; ip2 = 2; ip3 = 3; psc = 0; alpha = MAX_16; time = add(MAX_TIME, extra); /* Four loops to search innovation code. */ ptr_ri0i0 = rri0i0; /* Init. pointers that depend on first loop */ ptr_ri0i1 = rri0i1; ptr_ri0i2 = rri0i2; ptr_ri0i3 = rri0i3; ptr_ri0i4 = rri0i4; for (i0 = 0; i0 < L_SUBFR; i0 += STEP) /* first pulse loop */ { ps0 = Dn[i0]; alp0 = *ptr_ri0i0++; ptr_ri1i1 = rri1i1; /* Init. pointers that depend on second loop */ ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for (i1 = 1; i1 < L_SUBFR; i1 += STEP) /* second pulse loop */ { ps1 = add(ps0, Dn[i1]); /* alp1 = alp0 + *ptr_ri1i1++ + 2.0 * ( *ptr_ri0i1++); */ alp1 = L_mult(alp0, 1); alp1 = L_mac(alp1, *ptr_ri1i1++, 1); alp1 = L_mac(alp1, *ptr_ri0i1++, 2); ptr_ri2i2 = rri2i2; /* Init. pointers that depend on third loop */ ptr_ri2i3 = rri2i3; ptr_ri2i4 = rri2i4; for (i2 = 2; i2 < L_SUBFR; i2 += STEP) /* third pulse loop */ { ps2 = add(ps1, Dn[i2]); /* alp2 = alp1 + *ptr_ri2i2++ + 2.0 * (*ptr_ri0i2++ + *ptr_ri1i2++); */ alp2 = L_mac(alp1, *ptr_ri2i2++, 1); alp2 = L_mac(alp2, *ptr_ri0i2++, 2); alp2 = L_mac(alp2, *ptr_ri1i2++, 2); /* Test threshold */ if ( sub(ps2, thres) > 0) { ptr_ri3i3 = rri3i3; /* Init. pointers that depend on 4th loop */ for (i3 = 3; i3 < L_SUBFR; i3 += STEP) /* 4th pulse loop */ { ps3 = add(ps2, Dn[i3]); /* alp3 = alp2 + *ptr_ri3i3++ */ /* + 2.0*( *ptr_ri0i3++ + *ptr_ri1i3++ + *ptr_ri2i3++); */ alp3 = L_mac(alp2, *ptr_ri3i3++, 1); alp3 = L_mac(alp3, *ptr_ri0i3++, 2); alp3 = L_mac(alp3, *ptr_ri1i3++, 2); alp3 = L_mac(alp3, *ptr_ri2i3++, 2); alp = extract_l(L_shr(alp3, 5)); ps3c = mult(ps3, ps3); L_temp = L_mult(ps3c, alpha); L_temp = L_msu(L_temp, psc, alp); if( L_temp > 0L ) { psc = ps3c; alpha = alp; ip0 = i0; ip1 = i1; ip2 = i2; ip3 = i3; } } /* end of for i3 = */ ptr_ri0i3 -= NB_POS; ptr_ri1i3 -= NB_POS; ptr_ri4i4 = rri4i4; /* Init. pointers that depend on 4th loop */ for (i3 = 4; i3 < L_SUBFR; i3 += STEP) /* 4th pulse loop */ { ps3 = add(ps2, Dn[i3]); /* alp3 = alp2 + *ptr_ri4i4++ */ /* + 2.0*( *ptr_ri0i4++ + *ptr_ri1i4++ + *ptr_ri2i4++); */ alp3 = L_mac(alp2, *ptr_ri4i4++, 1); alp3 = L_mac(alp3, *ptr_ri0i4++, 2); alp3 = L_mac(alp3, *ptr_ri1i4++, 2); alp3 = L_mac(alp3, *ptr_ri2i4++, 2); alp = extract_l(L_shr(alp3, 5)); ps3c = mult(ps3, ps3); L_temp = L_mult(ps3c, alpha); L_temp = L_msu(L_temp, psc, alp); if( L_temp > 0L ) { psc = ps3c; alpha = alp; ip0 = i0; ip1 = i1; ip2 = i2; ip3 = i3; } } /* end of for i3 = */ ptr_ri0i4 -= NB_POS; ptr_ri1i4 -= NB_POS; time = sub(time, 1); if(time <= 0 ) goto end_search; /* Maximum time finish */ } /* end of if >thres */ else { ptr_ri2i3 += NB_POS; ptr_ri2i4 += NB_POS; } } /* end of for i2 = */ ptr_ri0i2 -= NB_POS; ptr_ri1i3 += NB_POS; ptr_ri1i4 += NB_POS; } /* end of for i1 = */ ptr_ri0i2 += NB_POS; ptr_ri0i3 += NB_POS; ptr_ri0i4 += NB_POS; } /* end of for i0 = */ end_search: extra = time; /* Set the sign of impulses */ i0 = p_sign[ip0]; i1 = p_sign[ip1]; i2 = p_sign[ip2]; i3 = p_sign[ip3]; /* Find the codeword corresponding to the selected positions */ for(i=0; i<L_SUBFR; i++) { cod[i] = 0; } cod[ip0] = shr(i0, 2); /* From Q15 to Q13 */ cod[ip1] = shr(i1, 2); cod[ip2] = shr(i2, 2); cod[ip3] = shr(i3, 2); /* find the filtered codeword */ for (i = 0; i < L_SUBFR; i++) { y[i] = 0; } if(i0 > 0) for(i=ip0, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip0, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i1 > 0) for(i=ip1, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip1, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i2 > 0) for(i=ip2, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip2, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i3 > 0) for(i=ip3, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip3, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } /* find codebook index; 17-bit address */ i = 0; if(i0 > 0) i = add(i, 1); if(i1 > 0) i = add(i, 2); if(i2 > 0) i = add(i, 4); if(i3 > 0) i = add(i, 8); *sign = i; ip0 = mult(ip0, 6554); /* ip0/5 */ ip1 = mult(ip1, 6554); /* ip1/5 */ ip2 = mult(ip2, 6554); /* ip2/5 */ i = mult(ip3, 6554); /* ip3/5 */ j = add(i, shl(i, 2)); /* j = i*5 */ j = sub(ip3, add(j, 3)); /* j= ip3%5 -3 */ ip3 = add(shl(i, 1), j); i = add(ip0, shl(ip1, 3)); i = add(i , shl(ip2, 6)); i = add(i , shl(ip3, 9)); return i; }
/*************************************************************************** Function: comp_powercat_and_catbalance Syntax: void comp_powercat_and_catbalance(Int16 *power_categories, Int16 *category_balances, Int16 *rms_index, Int16 number_of_available_bits, Int16 num_categorization_control_possibilities, Int16 offset) inputs: *rms_index number_of_available_bits num_categorization_control_possibilities offset outputs: *power_categories *category_balances Description: Computes the power_categories and the category balances WMOPS: 7kHz | 24kbit | 32kbit -------|--------------|---------------- AVG | 0.10 | 0.10 -------|--------------|---------------- MAX | 0.11 | 0.11 -------|--------------|---------------- 14kHz | 24kbit | 32kbit | 48kbit -------|--------------|----------------|---------------- AVG | 0.32 | 0.35 | 0.38 -------|--------------|----------------|---------------- MAX | 0.38 | 0.42 | 0.43 -------|--------------|----------------|---------------- ***************************************************************************/ void comp_powercat_and_catbalance(Int16 *power_categories, Int16 *category_balances, Int16 *rms_index, Int16 number_of_available_bits, Int16 offset) { Int16 region; Int16 j; Int16 max_rate_categories[MAX_NUMBER_OF_REGIONS]; Int16 min_rate_categories[MAX_NUMBER_OF_REGIONS]; Int16 temp_category_balances[2*MAX_NUM_CATEGORIZATION_CONTROL_POSSIBILITIES]; Int16 raw_max, raw_min; Int16 raw_max_index, raw_min_index; Int16 max_rate_pointer, min_rate_pointer; Int16 max, min; Int16 itemp0; Int16 itemp1; Int16 min_plus_max; max = 0; for (region = 0; region < gNumber_of_regions; region++) { j = sub(offset, rms_index[region]); /* make sure j is between 0 and NUM_CAT-1 */ if (j < 0) j = 0; else j = j >> 1; if(j >= NUM_CATEGORIES) j = NUM_CATEGORIES - 1; /* compute the power categories based on the uniform offset */ power_categories[region] = j; /* can't overflow */ max += expected_bits_table[j]; max_rate_categories[region] = j; min_rate_categories[region] = j; } min = max; max_rate_pointer = gNum_categorization_control_possibilities; min_rate_pointer = gNum_categorization_control_possibilities; for (j = 0; j < gNum_categorization_control_possibilities - 1; j++) { min_plus_max = add(max, min); itemp0 = shl(number_of_available_bits, 1); if (min_plus_max <= itemp0) { raw_min = 99; /* Search from lowest freq regions to highest for best */ /* region to reassign to a higher bit rate category. */ for (region = 0; region < gNumber_of_regions; region++) { if (max_rate_categories[region] > 0) { itemp0 = max_rate_categories[region] << 1; itemp1 = sub(offset, rms_index[region]); itemp0 = sub(itemp1, itemp0); if (itemp0 < raw_min) { raw_min = itemp0; raw_min_index = region; } } } max_rate_pointer--; temp_category_balances[max_rate_pointer] = raw_min_index; max = sub(max, expected_bits_table[max_rate_categories[raw_min_index]]); max_rate_categories[raw_min_index]--; max = add(max, expected_bits_table[max_rate_categories[raw_min_index]]); } else { raw_max = -99; /* Search from highest freq regions to lowest for best region to reassign to a lower bit rate category. */ for (region = gNumber_of_regions; region--; ) { if (min_rate_categories[region] < (NUM_CATEGORIES-1)) { itemp0 = min_rate_categories[region] << 1; itemp1 = sub(offset, rms_index[region]); itemp0 = sub(itemp1, itemp0); if (itemp0 > raw_max) { raw_max = itemp0; raw_max_index = region; } } } temp_category_balances[min_rate_pointer] = raw_max_index; min_rate_pointer++; min = sub(min, expected_bits_table[min_rate_categories[raw_max_index]]); min_rate_categories[raw_max_index]++; min = add(min, expected_bits_table[min_rate_categories[raw_max_index]]); } } for (region = gNumber_of_regions; region--; ) power_categories[region] = max_rate_categories[region]; for (j = 0; j < gNum_categorization_control_possibilities - 1; j++) category_balances[j] = temp_category_balances[max_rate_pointer++]; }
Word16 Pitch_ol_fast( /* output: open loop pitch lag */ Word16 signal[], /* input : signal used to compute the open loop pitch */ /* signal[-pit_max] to signal[-1] should be known */ Word16 pit_max, /* input : maximum pitch lag */ Word16 L_frame /* input : length of frame to compute pitch */ ) { Word16 i, j; Word16 max1, max2, max3; Word16 max_h, max_l, ener_h, ener_l; Word16 T1, T2, T3; Word16 *p, *p1; Word32 max, sum, L_temp; /* Scaled signal */ Word16 scaled_signal[L_FRAME+PIT_MAX]; Word16 *scal_sig; scal_sig = &scaled_signal[pit_max]; /*--------------------------------------------------------* * Verification for risk of overflow. * *--------------------------------------------------------*/ Overflow = 0; sum = 0; for(i= -pit_max; i< L_frame; i+=2) sum = L_mac(sum, signal[i], signal[i]); /*--------------------------------------------------------* * Scaling of input signal. * * * * if Overflow -> scal_sig[i] = signal[i]>>3 * * else if sum < 1^20 -> scal_sig[i] = signal[i]<<3 * * else -> scal_sig[i] = signal[i] * *--------------------------------------------------------*/ if(Overflow == 1) { for(i=-pit_max; i<L_frame; i++) { scal_sig[i] = shr(signal[i], 3); } } else { L_temp = L_sub(sum, (Word32)1048576L); if ( L_temp < (Word32)0 ) /* if (sum < 2^20) */ { for(i=-pit_max; i<L_frame; i++) { scal_sig[i] = shl(signal[i], 3); } } else { for(i=-pit_max; i<L_frame; i++) { scal_sig[i] = signal[i]; } } } /*--------------------------------------------------------------------* * The pitch lag search is divided in three sections. * * Each section cannot have a pitch multiple. * * We find a maximum for each section. * * We compare the maxima of each section by favoring small lag. * * * * First section: lag delay = 20 to 39 * * Second section: lag delay = 40 to 79 * * Third section: lag delay = 80 to 143 * *--------------------------------------------------------------------*/ /* First section */ max = MIN_32; T1 = 20; /* Only to remove warning from some compilers */ for (i = 20; i < 40; i++) { p = scal_sig; p1 = &scal_sig[-i]; sum = 0; for (j=0; j<L_frame; j+=2, p+=2, p1+=2) sum = L_mac(sum, *p, *p1); L_temp = L_sub(sum, max); if (L_temp > 0) { max = sum; T1 = i; } } /* compute energy of maximum */ sum = 1; /* to avoid division by zero */ p = &scal_sig[-T1]; for(i=0; i<L_frame; i+=2, p+=2) sum = L_mac(sum, *p, *p); /* max1 = max/sqrt(energy) */ /* This result will always be on 16 bits !! */ sum = Inv_sqrt(sum); /* 1/sqrt(energy), result in Q30 */ L_Extract(max, &max_h, &max_l); L_Extract(sum, &ener_h, &ener_l); sum = Mpy_32(max_h, max_l, ener_h, ener_l); max1 = extract_l(sum); /* Second section */ max = MIN_32; T2 = 40; /* Only to remove warning from some compilers */ for (i = 40; i < 80; i++) { p = scal_sig; p1 = &scal_sig[-i]; sum = 0; for (j=0; j<L_frame; j+=2, p+=2, p1+=2) sum = L_mac(sum, *p, *p1); L_temp = L_sub(sum, max); if (L_temp > 0) { max = sum; T2 = i; } } /* compute energy of maximum */ sum = 1; /* to avoid division by zero */ p = &scal_sig[-T2]; for(i=0; i<L_frame; i+=2, p+=2) sum = L_mac(sum, *p, *p); /* max2 = max/sqrt(energy) */ /* This result will always be on 16 bits !! */ sum = Inv_sqrt(sum); /* 1/sqrt(energy), result in Q30 */ L_Extract(max, &max_h, &max_l); L_Extract(sum, &ener_h, &ener_l); sum = Mpy_32(max_h, max_l, ener_h, ener_l); max2 = extract_l(sum); /* Third section */ max = MIN_32; T3 = 80; /* Only to remove warning from some compilers */ for (i = 80; i < 143; i+=2) { p = scal_sig; p1 = &scal_sig[-i]; sum = 0; for (j=0; j<L_frame; j+=2, p+=2, p1+=2) sum = L_mac(sum, *p, *p1); L_temp = L_sub(sum, max); if (L_temp > 0) { max = sum; T3 = i; } } /* Test around max3 */ i = T3; p = scal_sig; p1 = &scal_sig[-(i+1)]; sum = 0; for (j=0; j<L_frame; j+=2, p+=2, p1+=2) sum = L_mac(sum, *p, *p1); L_temp = L_sub(sum, max); if (L_temp > 0) { max = sum; T3 = i+(Word16)1; } p = scal_sig; p1 = &scal_sig[-(i-1)]; sum = 0; for (j=0; j<L_frame; j+=2, p+=2, p1+=2) sum = L_mac(sum, *p, *p1); L_temp = L_sub(sum, max); if (L_temp > 0) { max = sum; T3 = i-(Word16)1; } /* compute energy of maximum */ sum = 1; /* to avoid division by zero */ p = &scal_sig[-T3]; for(i=0; i<L_frame; i+=2, p+=2) sum = L_mac(sum, *p, *p); /* max1 = max/sqrt(energy) */ /* This result will always be on 16 bits !! */ sum = Inv_sqrt(sum); /* 1/sqrt(energy), result in Q30 */ L_Extract(max, &max_h, &max_l); L_Extract(sum, &ener_h, &ener_l); sum = Mpy_32(max_h, max_l, ener_h, ener_l); max3 = extract_l(sum); /*-----------------------* * Test for multiple. * *-----------------------*/ /* if( abs(T2*2 - T3) < 5) */ /* max2 += max3 * 0.25; */ i = sub(shl(T2,1), T3); j = sub(abs_s(i), 5); if(j < 0) max2 = add(max2, shr(max3, 2)); /* if( abs(T2*3 - T3) < 7) */ /* max2 += max3 * 0.25; */ i = add(i, T2); j = sub(abs_s(i), 7); if(j < 0) max2 = add(max2, shr(max3, 2)); /* if( abs(T1*2 - T2) < 5) */ /* max1 += max2 * 0.20; */ i = sub(shl(T1,1), T2); j = sub(abs_s(i), 5); if(j < 0) max1 = add(max1, mult(max2, 6554)); /* if( abs(T1*3 - T2) < 7) */ /* max1 += max2 * 0.20; */ i = add(i, T1); j = sub(abs_s(i), 7); if(j < 0) max1 = add(max1, mult(max2, 6554)); /*--------------------------------------------------------------------* * Compare the 3 sections maxima. * *--------------------------------------------------------------------*/ if( sub(max1, max2) < 0 ) {max1 = max2; T1 = T2; } if( sub(max1, max3) <0 ) {T1 = T3; } return T1; }
void Relspwede( Word16 lsp[], /* (i) Q13 : unquantized LSP parameters */ Word16 wegt[], /* (i) norm: weighting coefficients */ Word16 lspq[], /* (o) Q13 : quantized LSP parameters */ Word16 lspcb1[][M], /* (i) Q13 : first stage LSP codebook */ Word16 lspcb2[][M], /* (i) Q13 : Second stage LSP codebook */ Word16 fg[MODE][MA_NP][M], /* (i) Q15 : MA prediction coefficients */ Word16 freq_prev[MA_NP][M], /* (i) Q13 : previous LSP vector */ Word16 fg_sum[MODE][M], /* (i) Q15 : present MA prediction coef.*/ Word16 fg_sum_inv[MODE][M], /* (i) Q12 : inverse coef. */ Word16 code_ana[], /* (o) : codes of the selected LSP */ Word16 freq_cur[] /* (o) Q13 : current LSP MA vector */ ) { Word16 mode, j; Word16 index, mode_index; Word16 cand[MODE], cand_cur; Word16 tindex1[MODE], tindex2[MODE]; Word32 L_tdist[MODE]; /* Q26 */ Word16 rbuf[M]; /* Q13 */ Word16 buf[M]; /* Q13 */ for(mode = 0; mode<MODE; mode++) { Lsp_prev_extract(lsp, rbuf, fg[mode], freq_prev, fg_sum_inv[mode]); Lsp_pre_select(rbuf, lspcb1, &cand_cur ); cand[mode] = cand_cur; Lsp_select_1(rbuf, lspcb1[cand_cur], wegt, lspcb2, &index); tindex1[mode] = index; for( j = 0 ; j < NC ; j++ ) buf[j] = add( lspcb1[cand_cur][j], lspcb2[index][j] ); Lsp_expand_1(buf, GAP1); Lsp_select_2(rbuf, lspcb1[cand_cur], wegt, lspcb2, &index); tindex2[mode] = index; for( j = NC ; j < M ; j++ ) buf[j] = add( lspcb1[cand_cur][j], lspcb2[index][j] ); Lsp_expand_2(buf, GAP1); Lsp_expand_1_2(buf, GAP2); Lsp_get_tdist(wegt, buf, &L_tdist[mode], rbuf, fg_sum[mode]); } Lsp_last_select(L_tdist, &mode_index); code_ana[0] = shl( mode_index,NC0_B ) | cand[mode_index]; code_ana[1] = shl( tindex1[mode_index],NC1_B ) | tindex2[mode_index]; Lsp_get_quante(lspcb1, lspcb2, cand[mode_index], tindex1[mode_index], tindex2[mode_index], fg[mode_index], freq_prev, lspq, fg_sum[mode_index], freq_cur) ; return; }
Word16 Cb_gain_average( Cb_gain_averageState *st, /* i/o : State variables for CB gain averaging */ enum Mode mode, /* i : AMR mode */ Word16 gain_code, /* i : CB gain Q1 */ Word16 lsp[], /* i : The LSP for the current frame Q15 */ Word16 lspAver[], /* i : The average of LSP for 8 frames Q15 */ Word16 bfi, /* i : bad frame indication flag */ Word16 prev_bf, /* i : previous bad frame indication flag */ Word16 pdfi, /* i : potential degraded bad frame ind flag */ Word16 prev_pdf, /* i : prev pot. degraded bad frame ind flag */ Word16 inBackgroundNoise, /* i : background noise decision */ Word16 voicedHangover, /* i : # of frames after last voiced frame */ Flag *pOverflow ) { Word16 i; Word16 cbGainMix; Word16 diff; Word16 tmp_diff; Word16 bgMix; Word16 cbGainMean; Word32 L_sum; Word16 tmp[M]; Word16 tmp1; Word16 tmp2; Word16 shift1; Word16 shift2; Word16 shift; /*---------------------------------------------------------* * Compute mixed cb gain, used to make cb gain more * * smooth in background noise for modes 5.15, 5.9 and 6.7 * * states that needs to be updated by all * *---------------------------------------------------------*/ /* set correct cbGainMix for MR74, MR795, MR122 */ cbGainMix = gain_code; /*-------------------------------------------------------* * Store list of CB gain needed in the CB gain * * averaging * *-------------------------------------------------------*/ for (i = 0; i < (L_CBGAINHIST - 1); i++) { st->cbGainHistory[i] = st->cbGainHistory[i+1]; } st->cbGainHistory[L_CBGAINHIST-1] = gain_code; diff = 0; /* compute lsp difference */ for (i = 0; i < M; i++) { tmp1 = abs_s(sub(*(lspAver + i), *(lsp + i), pOverflow)); /* Q15 */ shift1 = sub(norm_s(tmp1), 1, pOverflow); /* Qn */ tmp1 = shl(tmp1, shift1, pOverflow); /* Q15+Qn */ shift2 = norm_s(*(lspAver + i)); /* Qm */ tmp2 = shl(*(lspAver + i), shift2, pOverflow); /* Q15+Qm */ tmp[i] = div_s(tmp1, tmp2); /* Q15+(Q15+Qn)-(Q15+Qm) */ shift = 2 + shift1 - shift2; if (shift >= 0) { *(tmp + i) = shr(*(tmp + i), shift, pOverflow); /* Q15+Qn-Qm-Qx=Q13 */ } else { *(tmp + i) = shl(*(tmp + i), negate(shift), pOverflow); /* Q15+Qn-Qm-Qx=Q13 */ } diff = add(diff, *(tmp + i), pOverflow); /* Q13 */ } /* Compute hangover */ if (diff > 5325) /* 0.65 in Q11 */ { st->hangVar += 1; } else { st->hangVar = 0; } if (st->hangVar > 10) { /* Speech period, reset hangover variable */ st->hangCount = 0; } /* Compute mix constant (bgMix) */ bgMix = 8192; /* 1 in Q13 */ if ((mode <= MR67) || (mode == MR102)) /* MR475, MR515, MR59, MR67, MR102 */ { /* if errors and presumed noise make smoothing probability stronger */ if (((((pdfi != 0) && (prev_pdf != 0)) || (bfi != 0) || (prev_bf != 0)) && (voicedHangover > 1) && (inBackgroundNoise != 0) && ((mode == MR475) || (mode == MR515) || (mode == MR59)))) { /* bgMix = min(0.25, max(0.0, diff-0.55)) / 0.25; */ tmp_diff = sub(diff, 4506, pOverflow); /* 0.55 in Q13 */ } else { /* bgMix = min(0.25, max(0.0, diff-0.40)) / 0.25; */ tmp_diff = sub(diff, 3277, pOverflow); /* 0.4 in Q13 */ } /* max(0.0, diff-0.55) or */ /* max(0.0, diff-0.40) */ if (tmp_diff > 0) { tmp1 = tmp_diff; } else { tmp1 = 0; } /* min(0.25, tmp1) */ if (2048 < tmp1) { bgMix = 8192; } else { bgMix = shl(tmp1, 2, pOverflow); } if ((st->hangCount < 40) || (diff > 5325)) /* 0.65 in Q13 */ { /* disable mix if too short time since */ bgMix = 8192; } /* Smoothen the cb gain trajectory */ /* smoothing depends on mix constant bgMix */ L_sum = L_mult(6554, st->cbGainHistory[2], pOverflow); /* 0.2 in Q15; L_sum in Q17 */ for (i = 3; i < L_CBGAINHIST; i++) { L_sum = L_mac(L_sum, 6554, st->cbGainHistory[i], pOverflow); } cbGainMean = pv_round(L_sum, pOverflow); /* Q1 */ /* more smoothing in error and bg noise (NB no DFI used here) */ if (((bfi != 0) || (prev_bf != 0)) && (inBackgroundNoise != 0) && ((mode == MR475) || (mode == MR515) || (mode == MR59))) { /* 0.143 in Q15; L_sum in Q17 */ L_sum = L_mult(4681, st->cbGainHistory[0], pOverflow); for (i = 1; i < L_CBGAINHIST; i++) { L_sum = L_mac(L_sum, 4681, st->cbGainHistory[i], pOverflow); } cbGainMean = pv_round(L_sum, pOverflow); /* Q1 */ } /* cbGainMix = bgMix*cbGainMix + (1-bgMix)*cbGainMean; */ /* L_sum in Q15 */ L_sum = L_mult(bgMix, cbGainMix, pOverflow); L_sum = L_mac(L_sum, 8192, cbGainMean, pOverflow); L_sum = L_msu(L_sum, bgMix, cbGainMean, pOverflow); cbGainMix = pv_round(L_shl(L_sum, 2, pOverflow), pOverflow); /* Q1 */ } st->hangCount += 1; return (cbGainMix); }
/************************************************************************* * * FUNCTION: Pitch_ol * * PURPOSE: Compute the open loop pitch lag. * * DESCRIPTION: * The open-loop pitch lag is determined based on the perceptually * weighted speech signal. This is done in the following steps: * - find three maxima of the correlation <sw[n],sw[n-T]>, * dividing the search range into three parts: * pit_min ... 2*pit_min-1 * 2*pit_min ... 4*pit_min-1 * 4*pit_min ... pit_max * - divide each maximum by <sw[n-t], sw[n-t]> where t is the delay at * that maximum correlation. * - select the delay of maximum normalized correlation (among the * three candidates) while favoring the lower delay ranges. * *************************************************************************/ Word16 Pitch_ol ( /* o : open loop pitch lag */ vadState *vadSt, /* i/o : VAD state struct */ enum Mode mode, /* i : coder mode */ Word16 signal[], /* i : signal used to compute the open loop pitch */ /* signal[-pit_max] to signal[-1] should be known */ Word16 pit_min, /* i : minimum pitch lag */ Word16 pit_max, /* i : maximum pitch lag */ Word16 L_frame, /* i : length of frame to compute pitch */ Word16 idx, /* i : frame index */ Flag dtx /* i : dtx flag; use dtx=1, do not use dtx=0 */ ) { Word16 i, j; Word16 max1, max2, max3; Word16 p_max1, p_max2, p_max3; Word16 scal_flag = 0; Word32 t0; #ifdef VAD2 Word32 r01, r02, r03; Word32 rmax1, rmax2, rmax3; #else Word16 corr_hp_max; #endif Word32 corr[PIT_MAX+1], *corr_ptr; /* Scaled signal */ Word16 scaled_signal[L_FRAME + PIT_MAX]; Word16 *scal_sig, scal_fac; #ifndef VAD2 if (dtx) { /* no test() call since this if is only in simulation env */ /* update tone detection */ test(); test(); if ((sub(mode, MR475) == 0) || (sub(mode, MR515) == 0)) { vad_tone_detection_update (vadSt, 1); } else { vad_tone_detection_update (vadSt, 0); } } #endif scal_sig = &scaled_signal[pit_max]; move16 (); t0 = 0L; move32 (); for (i = -pit_max; i < L_frame; i++) { t0 = L_mac (t0, signal[i], signal[i]); } /*--------------------------------------------------------* * Scaling of input signal. * * * * if Overflow -> scal_sig[i] = signal[i]>>3 * * else if t0 < 1^20 -> scal_sig[i] = signal[i]<<3 * * else -> scal_sig[i] = signal[i] * *--------------------------------------------------------*/ /*--------------------------------------------------------* * Verification for risk of overflow. * *--------------------------------------------------------*/ test (); if (L_sub (t0, MAX_32) == 0L) /* Test for overflow */ { for (i = -pit_max; i < L_frame; i++) { scal_sig[i] = shr (signal[i], 3); move16 (); } scal_fac = 3; move16 (); } else if (L_sub (t0, (Word32) 1048576L) < (Word32) 0) /* if (t0 < 2^20) */ { test (); for (i = -pit_max; i < L_frame; i++) { scal_sig[i] = shl (signal[i], 3); move16 (); } scal_fac = -3; move16 (); } else { test (); for (i = -pit_max; i < L_frame; i++) { scal_sig[i] = signal[i]; move16 (); } scal_fac = 0; move16 (); } /* calculate all coreelations of scal_sig, from pit_min to pit_max */ corr_ptr = &corr[pit_max]; move32 (); comp_corr (scal_sig, L_frame, pit_max, pit_min, corr_ptr); /*--------------------------------------------------------------------* * The pitch lag search is divided in three sections. * * Each section cannot have a pitch multiple. * * We find a maximum for each section. * * We compare the maximum of each section by favoring small lags. * * * * First section: lag delay = pit_max downto 4*pit_min * * Second section: lag delay = 4*pit_min-1 downto 2*pit_min * * Third section: lag delay = 2*pit_min-1 downto pit_min * *--------------------------------------------------------------------*/ /* mode dependent scaling in Lag_max */ test (); if (sub(mode, MR122) == 0) { scal_flag = 1; move16 (); } else { scal_flag = 0; move16 (); } #ifdef VAD2 j = shl (pit_min, 2); p_max1 = Lag_max (corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, pit_max, j, &max1, &rmax1, &r01, dtx); move16 (); /* function result */ i = sub (j, 1); j = shl (pit_min, 1); p_max2 = Lag_max (corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, i, j, &max2, &rmax2, &r02, dtx); move16 (); /* function result */ i = sub (j, 1); p_max3 = Lag_max (corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, i, pit_min, &max3, &rmax3, &r03, dtx); move16 (); /* function result */ #else j = shl (pit_min, 2); p_max1 = Lag_max (vadSt, corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, pit_max, j, &max1, dtx); move16 (); /* function result */ i = sub (j, 1); j = shl (pit_min, 1); p_max2 = Lag_max (vadSt, corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, i, j, &max2, dtx); move16 (); /* function result */ i = sub (j, 1); p_max3 = Lag_max (vadSt, corr_ptr, scal_sig, scal_fac, scal_flag, L_frame, i, pit_min, &max3, dtx); move16 (); /* function result */ if (dtx) { /* no test() call since this if is only in simulation env */ test (); if (sub(idx, 1) == 0) { /* calculate max high-passed filtered correlation of all lags */ hp_max (corr_ptr, scal_sig, L_frame, pit_max, pit_min, &corr_hp_max); /* update complex background detector */ vad_complex_detection_update(vadSt, corr_hp_max); } } #endif /*--------------------------------------------------------------------* * Compare the 3 sections maximum, and favor small lag. * *--------------------------------------------------------------------*/ test (); if (sub (mult (max1, THRESHOLD), max2) < 0) { max1 = max2; move16 (); p_max1 = p_max2; move16 (); #ifdef VAD2 if (dtx) { rmax1 = rmax2; move32 (); r01 = r02; move32 (); } #endif } test (); if (sub (mult (max1, THRESHOLD), max3) < 0) { p_max1 = p_max3; move16 (); #ifdef VAD2 if (dtx) { rmax1 = rmax3; move32 (); r01 = r03; move32 (); } #endif } #ifdef VAD2 if (dtx) { vadSt->L_Rmax = L_add(vadSt->L_Rmax, rmax1); /* Save max correlation */ vadSt->L_R0 = L_add(vadSt->L_R0, r01); /* Save max energy */ } #endif return (p_max1); }
void GPUDrawScanlineCodeGenerator::Init() { mov(eax, dword[esp + _top]); // uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left; mov(edi, eax); shl(edi, 10 + m_sel.scalex); add(edi, edx); lea(edi, ptr[edi * 2 + (size_t)m_local.gd->vm]); // int steps = pixels - 8; sub(ecx, 8); if(m_sel.dtd) { // dither = GSVector4i::load<false>(&m_dither[top & 3][left & 3]); and(eax, 3); shl(eax, 5); and(edx, 3); shl(edx, 1); movdqu(xmm0, ptr[eax + edx + (size_t)m_dither]); movdqa(ptr[&m_local.temp.dither], xmm0); } mov(edx, dword[esp + _v]); if(m_sel.tme) { mov(esi, dword[&m_local.gd->tex]); // GSVector4i vt = GSVector4i(v.t).xxzzl(); cvttps2dq(xmm4, ptr[edx + offsetof(GSVertexSW, t)]); pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); // s = vt.xxxx().add16(m_local.d.s); // t = vt.yyyy().add16(m_local.d.t); pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddw(xmm2, ptr[&m_local.d.s]); if(!m_sel.sprite) { paddw(xmm3, ptr[&m_local.d.t]); } else { if(m_sel.ltf) { movdqa(xmm0, xmm3); psllw(xmm0, 8); psrlw(xmm0, 1); movdqa(ptr[&m_local.temp.vf], xmm0); } } movdqa(ptr[&m_local.temp.s], xmm2); movdqa(ptr[&m_local.temp.t], xmm3); } if(m_sel.tfx != 3) // != decal { // GSVector4i vc = GSVector4i(v.c).xxzzlh(); cvttps2dq(xmm6, ptr[edx + offsetof(GSVertexSW, c)]); pshuflw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); // r = vc.xxxx(); // g = vc.yyyy(); // b = vc.zzzz(); pshufd(xmm4, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); if(m_sel.iip) { // r = r.add16(m_local.d.r); // g = g.add16(m_local.d.g); // b = b.add16(m_local.d.b); paddw(xmm4, ptr[&m_local.d.r]); paddw(xmm5, ptr[&m_local.d.g]); paddw(xmm6, ptr[&m_local.d.b]); } movdqa(ptr[&m_local.temp.r], xmm4); movdqa(ptr[&m_local.temp.g], xmm5); movdqa(ptr[&m_local.temp.b], xmm6); } }
/*---------------------------------------------------------------------------* * Function Qua_gain * * ~~~~~~~~~~~~~~~~~~ * * Inputs: * * code[] :Innovative codebook. * * g_coeff[] :Correlations compute for pitch. * * L_subfr :Subframe length. * * * * Outputs: * * gain_pit :Quantized pitch gain. * * gain_cod :Quantized code gain. * * * * Return: * * Index of quantization. * * * *--------------------------------------------------------------------------*/ Word16 Qua_gain( Word16 code[], /* (i) Q13 :Innovative vector. */ Word16 g_coeff[], /* (i) :Correlations <xn y1> -2<y1 y1> */ /* <y2,y2>, -2<xn,y2>, 2<y1,y2> */ Word16 exp_coeff[], /* (i) :Q-Format g_coeff[] */ Word16 L_subfr, /* (i) :Subframe length. */ Word16 *gain_pit, /* (o) Q14 :Pitch gain. */ Word16 *gain_cod, /* (o) Q1 :Code gain. */ Word16 tameflag /* (i) : set to 1 if taming is needed */ ) { Word16 i, j, index1, index2; Word16 cand1, cand2; Word16 exp, gcode0, exp_gcode0, gcode0_org, e_min ; Word16 nume, denom, inv_denom; Word16 exp1,exp2,exp_nume,exp_denom,exp_inv_denom,sft,tmp; Word16 g_pitch, g2_pitch, g_code, g2_code, g_pit_cod; Word16 coeff[5], coeff_lsf[5]; Word16 exp_min[5]; Word32 L_gbk12; Word32 L_tmp, L_dist_min, L_tmp1, L_tmp2, L_acc, L_accb; Word16 best_gain[2]; /* Gain predictor, Past quantized energies = -14.0 in Q10 */ static Word16 past_qua_en[4] = { -14336, -14336, -14336, -14336 }; /*---------------------------------------------------* *- energy due to innovation -* *- predicted energy -* *- predicted codebook gain => gcode0[exp_gcode0] -* *---------------------------------------------------*/ Gain_predict( past_qua_en, code, L_subfr, &gcode0, &exp_gcode0 ); /*-----------------------------------------------------------------* * pre-selection * *-----------------------------------------------------------------*/ /*-----------------------------------------------------------------* * calculate best gain * * * * tmp = -1./(4.*coeff[0]*coeff[2]-coeff[4]*coeff[4]) ; * * best_gain[0] = (2.*coeff[2]*coeff[1]-coeff[3]*coeff[4])*tmp ; * * best_gain[1] = (2.*coeff[0]*coeff[3]-coeff[1]*coeff[4])*tmp ; * * gbk_presel(best_gain,&cand1,&cand2,gcode0) ; * * * *-----------------------------------------------------------------*/ /*-----------------------------------------------------------------* * tmp = -1./(4.*coeff[0]*coeff[2]-coeff[4]*coeff[4]) ; * *-----------------------------------------------------------------*/ L_tmp1 = L_mult( g_coeff[0], g_coeff[2] ); exp1 = add( add( exp_coeff[0], exp_coeff[2] ), 1-2 ); L_tmp2 = L_mult( g_coeff[4], g_coeff[4] ); exp2 = add( add( exp_coeff[4], exp_coeff[4] ), 1 ); //if( sub(exp1, exp2)>0 ){ if( exp1 > exp2 ){ L_tmp = L_sub( L_shr( L_tmp1, sub(exp1,exp2) ), L_tmp2 ); exp = exp2; } else{ L_tmp = L_sub( L_tmp1, L_shr( L_tmp2, sub(exp2,exp1) ) ); exp = exp1; } sft = norm_l_g729( L_tmp ); denom = extract_h( L_shl(L_tmp, sft) ); exp_denom = sub( add( exp, sft ), 16 ); inv_denom = div_s_g729(16384,denom); inv_denom = negate( inv_denom ); exp_inv_denom = sub( 14+15, exp_denom ); /*-----------------------------------------------------------------* * best_gain[0] = (2.*coeff[2]*coeff[1]-coeff[3]*coeff[4])*tmp ; * *-----------------------------------------------------------------*/ L_tmp1 = L_mult( g_coeff[2], g_coeff[1] ); exp1 = add( exp_coeff[2], exp_coeff[1] ); L_tmp2 = L_mult( g_coeff[3], g_coeff[4] ); exp2 = add( add( exp_coeff[3], exp_coeff[4] ), 1 ); //if( sub(exp1, exp2)>0 ){ if (exp1 > exp2){ L_tmp = L_sub( L_shr( L_tmp1, add(sub(exp1,exp2),1 )), L_shr( L_tmp2,1 ) ); exp = sub(exp2,1); } else{ L_tmp = L_sub( L_shr( L_tmp1,1 ), L_shr( L_tmp2, add(sub(exp2,exp1),1 )) ); exp = sub(exp1,1); } sft = norm_l_g729( L_tmp ); nume = extract_h( L_shl(L_tmp, sft) ); exp_nume = sub( add( exp, sft ), 16 ); sft = sub( add( exp_nume, exp_inv_denom ), (9+16-1) ); L_acc = L_shr( L_mult( nume,inv_denom ), sft ); best_gain[0] = extract_h( L_acc ); /*-- best_gain[0]:Q9 --*/ if (tameflag == 1){ //if(sub(best_gain[0], GPCLIP2) > 0) best_gain[0] = GPCLIP2; if(best_gain[0] > GPCLIP2) best_gain[0] = GPCLIP2; } /*-----------------------------------------------------------------* * best_gain[1] = (2.*coeff[0]*coeff[3]-coeff[1]*coeff[4])*tmp ; * *-----------------------------------------------------------------*/ L_tmp1 = L_mult( g_coeff[0], g_coeff[3] ); exp1 = add( exp_coeff[0], exp_coeff[3] ) ; L_tmp2 = L_mult( g_coeff[1], g_coeff[4] ); exp2 = add( add( exp_coeff[1], exp_coeff[4] ), 1 ); //if( sub(exp1, exp2)>0 ){ if( exp1 > exp2 ){ L_tmp = L_sub( L_shr( L_tmp1, add(sub(exp1,exp2),1) ), L_shr( L_tmp2,1 ) ); exp = sub(exp2,1); //exp = exp2--; } else{ L_tmp = L_sub( L_shr( L_tmp1,1 ), L_shr( L_tmp2, add(sub(exp2,exp1),1) ) ); exp = sub(exp1,1); //exp = exp1--; } sft = norm_l_g729( L_tmp ); nume = extract_h( L_shl(L_tmp, sft) ); exp_nume = sub( add( exp, sft ), 16 ); sft = sub( add( exp_nume, exp_inv_denom ), (2+16-1) ); L_acc = L_shr( L_mult( nume,inv_denom ), sft ); best_gain[1] = extract_h( L_acc ); /*-- best_gain[1]:Q2 --*/ /*--- Change Q-format of gcode0 ( Q[exp_gcode0] -> Q4 ) ---*/ //if( sub(exp_gcode0,4) >= 0 ){ if (exp_gcode0 >=4) { gcode0_org = shr_g729( gcode0, sub(exp_gcode0,4) ); } else{ L_acc = L_deposit_l_g729( gcode0 ); L_acc = L_shl( L_acc, sub( (4+16), exp_gcode0 ) ); gcode0_org = extract_h( L_acc ); /*-- gcode0_org:Q4 --*/ } /*----------------------------------------------* * - presearch for gain codebook - * *----------------------------------------------*/ Gbk_presel(best_gain, &cand1, &cand2, gcode0_org ); /*---------------------------------------------------------------------------* * * * Find the best quantizer. * * * * dist_min = MAX_32; * * for ( i=0 ; i<NCAN1 ; i++ ){ * * for ( j=0 ; j<NCAN2 ; j++ ){ * * g_pitch = gbk1[cand1+i][0] + gbk2[cand2+j][0]; * * g_code = gcode0 * (gbk1[cand1+i][1] + gbk2[cand2+j][1]); * * dist = g_pitch*g_pitch * coeff[0] * * + g_pitch * coeff[1] * * + g_code*g_code * coeff[2] * * + g_code * coeff[3] * * + g_pitch*g_code * coeff[4] ; * * * * if (dist < dist_min){ * * dist_min = dist; * * indice1 = cand1 + i ; * * indice2 = cand2 + j ; * * } * * } * * } * * * * g_pitch = Q13 * * g_pitch*g_pitch = Q11:(13+13+1-16) * * g_code = Q[exp_gcode0-3]:(exp_gcode0+(13-1)+1-16) * * g_code*g_code = Q[2*exp_gcode0-21]:(exp_gcode0-3+exp_gcode0-3+1-16) * * g_pitch*g_code = Q[exp_gcode0-5]:(13+exp_gcode0-3+1-16) * * * * term 0: g_pitch*g_pitch*coeff[0] ;exp_min0 = 13 +exp_coeff[0] * * term 1: g_pitch *coeff[1] ;exp_min1 = 14 +exp_coeff[1] * * term 2: g_code*g_code *coeff[2] ;exp_min2 = 2*exp_gcode0-21+exp_coeff[2] * * term 3: g_code *coeff[3] ;exp_min3 = exp_gcode0 - 3+exp_coeff[3] * * term 4: g_pitch*g_code *coeff[4] ;exp_min4 = exp_gcode0 - 4+exp_coeff[4] * * * *---------------------------------------------------------------------------*/ exp_min[0] = add( exp_coeff[0], 13 ); exp_min[1] = add( exp_coeff[1], 14 ); exp_min[2] = add( exp_coeff[2], sub( shl( exp_gcode0, 1 ), 21 ) ); exp_min[3] = add( exp_coeff[3], sub( exp_gcode0, 3 ) ); exp_min[4] = add( exp_coeff[4], sub( exp_gcode0, 4 ) ); e_min = exp_min[0]; for(i=1; i<5; i++){ //if( sub(exp_min[i], e_min) < 0 ){ if (exp_min[i] < e_min) { e_min = exp_min[i]; } } /* align coeff[] and save in special 32 bit double precision */ for(i=0; i<5; i++){ j = sub( exp_min[i], e_min ); L_tmp = (Word32)g_coeff[i] << 16; L_tmp = L_shr( L_tmp, j ); /* L_tmp:Q[exp_g_coeff[i]+16-j] */ L_Extract( L_tmp, &coeff[i], &coeff_lsf[i] ); /* DPF */ } /* Codebook search */ L_dist_min = MAX_32; /* initialization used only to suppress Microsoft Visual C++ warnings */ index1 = cand1; index2 = cand2; if(tameflag == 1){ for(i=0; i<NCAN1; i++){ for(j=0; j<NCAN2; j++){ g_pitch = add( gbk1[cand1+i][0], gbk2[cand2+j][0] ); /* Q14 */ if(g_pitch < GP0999) { L_acc = L_deposit_l_g729( gbk1[cand1+i][1] ); L_accb = L_deposit_l_g729( gbk2[cand2+j][1] ); /* Q13 */ L_tmp = L_add( L_acc,L_accb ); tmp = extract_l( L_shr( L_tmp,1 ) ); /* Q12 */ g_code = mult( gcode0, tmp ); /* Q[exp_gcode0+12-15] */ g2_pitch = mult(g_pitch, g_pitch); /* Q13 */ g2_code = mult(g_code, g_code); /* Q[2*exp_gcode0-6-15] */ g_pit_cod= mult(g_code, g_pitch); /* Q[exp_gcode0-3+14-15] */ L_tmp = Mpy_32_16(coeff[0], coeff_lsf[0], g2_pitch); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[1], coeff_lsf[1], g_pitch) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[2], coeff_lsf[2], g2_code) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[3], coeff_lsf[3], g_code) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[4], coeff_lsf[4], g_pit_cod) ); L_tmp += Mpy_32_16(coeff[1], coeff_lsf[1], g_pitch); L_tmp += Mpy_32_16(coeff[2], coeff_lsf[2], g2_code); L_tmp += Mpy_32_16(coeff[3], coeff_lsf[3], g_code); L_tmp += Mpy_32_16(coeff[4], coeff_lsf[4], g_pit_cod); //L_temp = L_sub(L_tmp, L_dist_min); //if( L_temp < 0L ){ if( L_tmp < L_dist_min ){ L_dist_min = L_tmp; index1 = add(cand1,i); index2 = add(cand2,j); } } } } } else{ for(i=0; i<NCAN1; i++){ for(j=0; j<NCAN2; j++){ g_pitch = add( gbk1[cand1+i][0], gbk2[cand2+j][0] ); /* Q14 */ L_acc = L_deposit_l_g729( gbk1[cand1+i][1] ); L_accb = L_deposit_l_g729( gbk2[cand2+j][1] ); /* Q13 */ L_tmp = L_add( L_acc,L_accb ); tmp = extract_l( L_shr( L_tmp,1 ) ); /* Q12 */ g_code = mult( gcode0, tmp ); /* Q[exp_gcode0+12-15] */ g2_pitch = mult(g_pitch, g_pitch); /* Q13 */ g2_code = mult(g_code, g_code); /* Q[2*exp_gcode0-6-15] */ g_pit_cod= mult(g_code, g_pitch); /* Q[exp_gcode0-3+14-15] */ L_tmp = Mpy_32_16(coeff[0], coeff_lsf[0], g2_pitch); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[1], coeff_lsf[1], g_pitch) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[2], coeff_lsf[2], g2_code) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[3], coeff_lsf[3], g_code) ); //L_tmp = L_add(L_tmp, Mpy_32_16(coeff[4], coeff_lsf[4], g_pit_cod) ); L_tmp += Mpy_32_16(coeff[1], coeff_lsf[1], g_pitch); L_tmp += Mpy_32_16(coeff[2], coeff_lsf[2], g2_code); L_tmp += Mpy_32_16(coeff[3], coeff_lsf[3], g_code); L_tmp += Mpy_32_16(coeff[4], coeff_lsf[4], g_pit_cod); if( L_tmp < L_dist_min ){ L_dist_min = L_tmp; index1 = add(cand1,i); index2 = add(cand2,j); } } } } /* Read the quantized gain */ /*-----------------------------------------------------------------* * *gain_pit = gbk1[indice1][0] + gbk2[indice2][0]; * *-----------------------------------------------------------------*/ *gain_pit = add( gbk1[index1][0], gbk2[index2][0] ); /* Q14 */ /*-----------------------------------------------------------------* * *gain_code = (gbk1[indice1][1]+gbk2[indice2][1]) * gcode0; * *-----------------------------------------------------------------*/ L_gbk12 = (Word32)gbk1[index1][1] + (Word32)gbk2[index2][1]; /* Q13 */ tmp = extract_l( L_shr( L_gbk12,1 ) ); /* Q12 */ L_acc = L_mult(tmp, gcode0); /* Q[exp_gcode0+12+1] */ L_acc = L_shl(L_acc, add( negate(exp_gcode0),(-12-1+1+16) )); *gain_cod = extract_h( L_acc ); /* Q1 */ /*----------------------------------------------* * update table of past quantized energies * *----------------------------------------------*/ Gain_update( past_qua_en, L_gbk12 ); return( add( map1[index1]*(Word16)NCODE2, map2[index2] ) ); }
void GPUDrawScanlineCodeGenerator::Generate() { push(esi); push(edi); Init(); align(16); L("loop"); // GSVector4i test = m_test[7 + (steps & (steps >> 31))]; mov(edx, ecx); sar(edx, 31); and(edx, ecx); shl(edx, 4); movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); // movdqu(xmm1, ptr[edi]); movq(xmm1, qword[edi]); movhps(xmm1, qword[edi + 8]); // ecx = steps // esi = tex (tme) // edi = fb // xmm1 = fd // xmm2 = s // xmm3 = t // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test TestMask(); SampleTexture(); // xmm1 = fd // xmm3 = a // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test // xmm0, xmm2 = free ColorTFX(); AlphaBlend(); Dither(); WriteFrame(); L("step"); // if(steps <= 0) break; test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); L("exit"); pop(edi); pop(esi); ret(8); }
//------------------------------------------------------------------------------------------------------------------------ // Call stubs are used to call Java from C // // GR_I0 - call wrapper address : address // GR_I1 - result : intptr_t* // GR_I2 - result type : BasicType // GR_I3 - method : methodOop // GR_I4 - interpreter entry point : address // GR_I5 - parameter block : intptr_t* // GR_I6 - parameter count in words : int // GR_I7 - thread : Thread* // address generate_call_stub(address& return_address) { StubCodeMark mark(this, "StubRoutines", "call_stub"); const Register result = GR_I1; const Register type = GR_I2; const Register method = GR_I3; const Register entry_ptr = GR_I4; const Register parms = GR_I5; const Register parm_count = GR_I6; const Register thread = GR_I7; const Register parm_size = GR31_SCRATCH; const Register entry = GR30_SCRATCH; const Register arg = GR29_SCRATCH; const Register out_tos = GR49; // Equivalent of GR_Otos const Register out_parms = GR50; // Equivalent of GR_Olocals (unused) const BranchRegister entry_br = BR6_SCRATCH; const PredicateRegister no_args = PR6_SCRATCH; address start = __ emit_fd(); // Must allocate 8 output registers in case we go thru an i2c // and the callee needs 8 input registers __ alloc(GR_Lsave_PFS, 8, 9, 8, 0); // save AR_PFS __ sxt4(parm_count, parm_count); // # of parms __ mov(GR_Lsave_SP, SP); // save caller's SP __ mov(GR_entry_frame_GR5, GR5_poll_page_addr); __ mov(GR_entry_frame_GR6, GR6_caller_BSP); __ mov(GR_entry_frame_GR7, GR7_reg_stack_limit); // We can not tolerate an eager RSE cpu. Itanium-1 & 2 do not support // this feature but we turn it off anyway const Register RSC = GR2_SCRATCH; __ mov(RSC, AR_RSC); __ and3(RSC, -4, RSC); // Turn off two low bits __ mov(AR_RSC, RSC); // enforced lazy mode __ shladd(parm_size, parm_count, Interpreter::logStackElementSize(), GR0); // size of stack space for the parms __ mov(GR_Lsave_RP, RP); // save return address __ add(parm_size, parm_size, 15); // round up to multiple of 16 bytes. we use // caller's 16-byte scratch area for params, // so no need to add 16 to the current frame size. __ mov(GR_Lsave_LC, AR_LC); // save AR_LC __ add(out_parms, SP, Interpreter::stackElementSize()); // caller's SP+8 is 1st parm addr == target method locals addr __ and3(parm_size, parm_size, -16); __ cmp4(PR0, no_args, 0, parm_count, Assembler::less); // any parms? __ mov(GR_entry_frame_GR4, GR4_thread); // save GR4_thread: it's a preserved register __ sub(SP, SP, parm_size); // allocate the space for args + scratch __ mov(entry_br, entry_ptr); __ mov(GR27_method, method); // load method __ mov(GR4_thread, thread); // load thread if (TaggedStackInterpreter) __ shl(parm_count, parm_count, 1); // 2x tags __ sub(parm_count, parm_count, 1); // cloop counts down to zero // Initialize the register and memory stack limits for stack checking in compiled code __ add(GR7_reg_stack_limit, thread_(register_stack_limit)); __ mov(GR6_caller_BSP, AR_BSP); // load register SP __ movl(GR5_poll_page_addr, (intptr_t) os::get_polling_page() ); __ ld8(GR7_reg_stack_limit, GR7_reg_stack_limit); // load register stack limit Label exit; __ mov(AR_LC, parm_count); __ mov(out_tos, out_parms); // out_tos = &out_parms[0] __ br(no_args, exit, Assembler::dpnt); // Reverse argument list and set up sender tos Label copy_word; __ bind(copy_word); __ ld8(arg, parms, BytesPerWord); // load *parms++ __ st8(out_tos, arg, -BytesPerWord); // store *out_tos-- __ cloop(copy_word, Assembler::sptk, Assembler::few); // Bias stack for tags. if (TaggedStackInterpreter) __ st8(out_tos, GR0, -BytesPerWord); __ bind(exit); __ mov(GR_entry_frame_TOS, out_tos); // so entry_frame_argument_at can find TOS // call interpreter frame manager // Remember the senderSP so we interpreter can pop c2i arguments off of the stack // when called via a c2i. __ mov(GR28_sender_SP, SP); __ call(entry_br); return_address = __ pc(); // Store result depending on type. Everything that is not // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. const PredicateRegister is_obj = PR6_SCRATCH; const PredicateRegister is_flt = PR7_SCRATCH; const PredicateRegister is_dbl = PR8_SCRATCH; const PredicateRegister is_lng = PR9_SCRATCH; __ cmp4(is_obj, PR0, T_OBJECT, type, Assembler::equal); __ cmp4(is_flt, PR0, T_FLOAT, type, Assembler::equal); __ st4( result, GR_RET); __ st8( is_obj, result, GR_RET); __ stfs(is_flt, result, FR_RET); __ cmp4(is_dbl, PR0, T_DOUBLE, type, Assembler::equal); __ stfd(is_dbl, result, FR_RET); __ cmp4(is_lng, PR0, T_LONG, type, Assembler::equal); __ mov(RP, GR_Lsave_RP); __ st8( is_lng, result, GR_RET); __ mov(GR4_thread, GR_entry_frame_GR4); __ mov(GR6_caller_BSP, GR_entry_frame_GR6); __ mov(GR7_reg_stack_limit, GR_entry_frame_GR7); __ mov(GR5_poll_page_addr, GR_entry_frame_GR5); __ mov(AR_PFS, GR_Lsave_PFS); __ mov(AR_LC, GR_Lsave_LC); __ mov(SP, GR_Lsave_SP); __ ret(); return start; }
void Az_lsp( Word16 a[], /* (i) Q12 : predictor coefficients */ Word16 lsp[], /* (o) Q15 : line spectral pairs */ Word16 old_lsp[] /* (i) : old lsp[] (in case not found 10 roots) */ ) { Word16 i, j, nf, ip; Word16 xlow, ylow, xhigh, yhigh, xmid, ymid, xint; Word16 x, y, sign, exp; Word16 *coef; Word16 f1[M/2+1], f2[M/2+1]; Word32 t0, L_temp; Flag ovf_coef; Word16 (*pChebps)(Word16 x, Word16 f[], Word16 n); /*-------------------------------------------------------------* * find the sum and diff. pol. F1(z) and F2(z) * * F1(z) <--- F1(z)/(1+z**-1) & F2(z) <--- F2(z)/(1-z**-1) * * * * f1[0] = 1.0; * * f2[0] = 1.0; * * * * for (i = 0; i< NC; i++) * * { * * f1[i+1] = a[i+1] + a[M-i] - f1[i] ; * * f2[i+1] = a[i+1] - a[M-i] + f2[i] ; * * } * *-------------------------------------------------------------*/ ovf_coef = 0; pChebps = Chebps_11; f1[0] = 2048; /* f1[0] = 1.0 is in Q11 */ f2[0] = 2048; /* f2[0] = 1.0 is in Q11 */ for (i = 0; i< NC; i++) { Overflow = 0; t0 = L_mult(a[i+1], 16384); /* x = (a[i+1] + a[M-i]) >> 1 */ t0 = L_mac(t0, a[M-i], 16384); /* -> From Q12 to Q11 */ x = extract_h(t0); if ( Overflow ) { ovf_coef = 1; } Overflow = 0; f1[i+1] = sub(x, f1[i]); /* f1[i+1] = a[i+1] + a[M-i] - f1[i] */ if ( Overflow ) { ovf_coef = 1; } Overflow = 0; t0 = L_mult(a[i+1], 16384); /* x = (a[i+1] - a[M-i]) >> 1 */ t0 = L_msu(t0, a[M-i], 16384); /* -> From Q12 to Q11 */ x = extract_h(t0); if ( Overflow ) { ovf_coef = 1; } Overflow = 0; f2[i+1] = add(x, f2[i]); /* f2[i+1] = a[i+1] - a[M-i] + f2[i] */ if ( Overflow ) { ovf_coef = 1; } } if ( ovf_coef ) { /*printf("===== OVF ovf_coef =====\n");*/ pChebps = Chebps_10; f1[0] = 1024; /* f1[0] = 1.0 is in Q10 */ f2[0] = 1024; /* f2[0] = 1.0 is in Q10 */ for (i = 0; i< NC; i++) { t0 = L_mult(a[i+1], 8192); /* x = (a[i+1] + a[M-i]) >> 1 */ t0 = L_mac(t0, a[M-i], 8192); /* -> From Q11 to Q10 */ x = extract_h(t0); f1[i+1] = sub(x, f1[i]); /* f1[i+1] = a[i+1] + a[M-i] - f1[i] */ t0 = L_mult(a[i+1], 8192); /* x = (a[i+1] - a[M-i]) >> 1 */ t0 = L_msu(t0, a[M-i], 8192); /* -> From Q11 to Q10 */ x = extract_h(t0); f2[i+1] = add(x, f2[i]); /* f2[i+1] = a[i+1] - a[M-i] + f2[i] */ } } /*-------------------------------------------------------------* * find the LSPs using the Chebichev pol. evaluation * *-------------------------------------------------------------*/ nf=0; /* number of found frequencies */ ip=0; /* indicator for f1 or f2 */ coef = f1; xlow = grid[0]; ylow = (*pChebps)(xlow, coef, NC); j = 0; while ( (nf < M) && (j < GRID_POINTS) ) { j =add(j,1); xhigh = xlow; yhigh = ylow; xlow = grid[j]; ylow = (*pChebps)(xlow,coef,NC); L_temp = L_mult(ylow ,yhigh); if ( L_temp <= (Word32)0) { /* divide 4 times the interval */ for (i = 0; i < 4; i++) { xmid = add( shr(xlow, 1) , shr(xhigh, 1)); /* xmid = (xlow + xhigh)/2 */ ymid = (*pChebps)(xmid,coef,NC); L_temp = L_mult(ylow,ymid); if ( L_temp <= (Word32)0) { yhigh = ymid; xhigh = xmid; } else { ylow = ymid; xlow = xmid; } } /*-------------------------------------------------------------* * Linear interpolation * * xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow); * *-------------------------------------------------------------*/ x = sub(xhigh, xlow); y = sub(yhigh, ylow); if(y == 0) { xint = xlow; } else { sign= y; y = abs_s(y); exp = norm_s(y); y = shl(y, exp); y = div_s( (Word16)16383, y); t0 = L_mult(x, y); t0 = L_shr(t0, sub(20, exp) ); y = extract_l(t0); /* y= (xhigh-xlow)/(yhigh-ylow) in Q11 */ if(sign < 0) y = negate(y); t0 = L_mult(ylow, y); /* result in Q26 */ t0 = L_shr(t0, 11); /* result in Q15 */ xint = sub(xlow, extract_l(t0)); /* xint = xlow - ylow*y */ } lsp[nf] = xint; xlow = xint; nf =add(nf,1); if(ip == 0) { ip = 1; coef = f2; } else { ip = 0; coef = f1; } ylow = (*pChebps)(xlow,coef,NC); } } /* Check if M roots found */ if( sub(nf, M) < 0) { for(i=0; i<M; i++) { lsp[i] = old_lsp[i]; } /* printf("\n !!Not 10 roots found in Az_lsp()!!!\n"); */ } return; }