void dec3_10( Shortword index, /* (i) : indx of 3 pulses (10 bits) */ Shortword cod[], /* (o) Q12: algebraic (fixed) codebook excitation */ Shortword l_subfr /* (i) : lenght of subframe (53 or 54) */ ) { Shortword i, pos, gsign, sign; for (i = 0; i < l_subfr; i++) cod[i] = 0; if ((index & 0x0200) != 0) gsign = -1; else gsign = 1; for (i = 4; i >= 0; i -= 2) { pos = add(shr(extract_l(L_mult((index & 0x0007), STEP)), 1), i); if (i == 2) sign = negate(gsign); else sign = gsign; if (pos < l_subfr) cod[pos] = sign; index = shr(index, 3); } return; }

void decode_4i40_17bits( Word16 sign, /* i : signs of 4 pulses. */ Word16 index, /* i : Positions of the 4 pulses. */ Word16 cod[] /* o : algebraic (fixed) codebook excitation */ ) { Word16 i, j; Word16 pos[NB_PULSE]; /* Decode the positions */ i = index & 7; logic16 (); i = dgray[i]; move16 (); pos[0] = add(i, shl(i, 2)); /* pos0 =i*5 */ move16 (); index = shr(index, 3); i = index & 7; logic16 (); i = dgray[i]; move16 (); i = add(i, shl(i, 2)); /* pos1 =i*5+1 */ pos[1] = add(i, 1); move16 (); index = shr(index, 3); i = index & 7; logic16 (); i = dgray[i]; move16 (); i = add(i, shl(i, 2)); /* pos2 =i*5+1 */ pos[2] = add(i, 2); move16 (); index = shr(index, 3); j = index & 1; logic16 (); index = shr(index, 1); i = index & 7; logic16 (); i = dgray[i]; move16 (); i = add(i, shl(i, 2)); /* pos3 =i*5+3+j */ i = add(i, 3); pos[3] = add(i, j); move16 (); /* decode the signs and build the codeword */ for (i = 0; i < L_SUBFR; i++) { cod[i] = 0; move16 (); } for (j = 0; j < NB_PULSE; j++) { i = sign & 1; logic16 (); sign = shr(sign, 1); test (); if (i != 0) { cod[pos[j]] = 8191; move16 (); } else { cod[pos[j]] = -8192; move16 (); } } return; }

int decode64 (unsigned char *dst, int size, char *src) { unsigned char *sptr = (unsigned char *) src; unsigned char *dptr = dst; unsigned char *end = dst + size; unsigned char c1, c2, c3, c4; do { c1 = char64(*sptr); c2 = char64(*(sptr + 1)); if (c1 == 255 || c2 == 255) break; *dptr++ = shl(c1, 2) | shr((c2 & 0x30), 4); if (dptr >= end) break; c3 = char64(*(sptr + 2)); if (c3 == 255) break; *dptr++ = shl((c2 & 0x0f), 4) | shr((c3 & 0x3c), 2); if (dptr >= end) break; c4 = char64(*(sptr + 3)); if (c4 == 255) break; *dptr++ = shl((c3 & 0x03), 6) | c4; sptr += 4; } while (dptr < end); return (dptr - dst); }

Word16 sqrts(Word16 x) { Word16 xb, y, exp, idx, sub_frac, sub_tab; Word32 a0; if(x <= 0){ y = 0; } else{ exp = norm_s(x); /* use 65-entry table */ xb = shl(x, exp); // normalization of x idx = shr(xb, 9); // for 65 entry table a0 = L_deposit_h(tabsqrt[idx]); // Q31 table look-up value sub_frac = shl((Word16)(xb & 0x01FF), 6); // Q15 sub-fraction sub_tab = sub(tabsqrt[idx+1], tabsqrt[idx]); // Q15 table interval for interpolation a0 = L_mac(a0, sub_frac, sub_tab); // Q31 linear interpolation between table entries if(exp & 0x0001){ exp = shr(add(exp, 1), 1); // normalization of sqrt() a0 = L_shr(a0, exp); y = intround(a0); // Q15 a0 = L_mac(a0, 13573, y); // Q31 incorporate the missing "/sqrt(2)" } else{ exp = shr(exp, 1); // normalization of sqrt() a0 = L_shr(a0, exp); // Q31 } y = intround(a0); // Q15 } return y; }

void Int_lpc( Word16 lsp_old[], /* input : LSP vector of past frame */ Word16 lsp_new[], /* input : LSP vector of present frame */ Word16 lsf_int[], /* output: interpolated lsf coefficients */ Word16 lsf_new[], Word16 Az[] /* output: interpolated Az() for the 2 subframes */ ) { Word16 i; Word16 lsp[M]; /* lsp[i] = lsp_new[i] * 0.5 + lsp_old[i] * 0.5 */ for (i = 0; i < M; i++) { lsp[i] = add(shr(lsp_new[i], 1), shr(lsp_old[i], 1)); } Lsp_Az(lsp, Az); Lsp_lsf(lsp, lsf_int, M); /* transformation from LSP to LSF (freq.domain) */ Lsp_lsf(lsp_new, lsf_new, M); /* transformation from LSP to LSF (freq.domain) */ return; }

static inline uint64_t Bitpack_newu(uint64_t word, unsigned width, unsigned lsb, uint64_t value) { unsigned hi = lsb + width; // one beyond the most significant bit assert(hi <= 64); if (!Bitpack_fitsu(value, width)) RAISE(Bit_Overflow); return shl(shr(word, hi), hi) // high part | shr(shl(word, 64 - lsb), 64 - lsb) // low part | (value << lsb); // new part }

void Decod_ACELP( Word16 sign, /* (i) : signs of 4 pulses. */ Word16 index, /* (i) : Positions of the 4 pulses. */ Word16 cod[] /* (o) Q13 : algebraic (fixed) codebook excitation */ ) { Word16 i, j; Word16 pos[4]; /* Decode the positions */ i = index & (Word16)7; pos[0] = add(i, shl(i, 2)); /* pos0 =i*5 */ index = shr(index, 3); i = index & (Word16)7; i = add(i, shl(i, 2)); /* pos1 =i*5+1 */ pos[1] = add(i, 1); index = shr(index, 3); i = index & (Word16)7; i = add(i, shl(i, 2)); /* pos2 =i*5+1 */ pos[2] = add(i, 2); index = shr(index, 3); j = index & (Word16)1; index = shr(index, 1); i = index & (Word16)7; i = add(i, shl(i, 2)); /* pos3 =i*5+3+j */ i = add(i, 3); pos[3] = add(i, j); /* decode the signs and build the codeword */ for (i=0; i<L_SUBFR; i++) { cod[i] = 0; } for (j=0; j<4; j++) { i = sign & (Word16)1; sign = shr(sign, 1); if (i != 0) { cod[pos[j]] = 8191; /* Q13 +1.0 */ } else { cod[pos[j]] = -8192; /* Q13 -1.0 */ } } return; }

uint64_t Bitpack_newu(uint64_t word, unsigned width, unsigned lsb, uint64_t value) { unsigned hi = lsb + width; /* one beyond the most significant bit */ assert(hi <= 64); if (!Bitpack_fitsu(value, width)) RAISE(Bitpack_Overflow); return shl(shr(word, hi), hi) /* high part */ | shr(shl(word, 64 - lsb), 64 - lsb) /* low part */ | (value << lsb); /* new part */ }

/** Performs a carryless multiplication of two 128bit integers modulo \f$ x^{128} + x^7 + x^2 + x + 1 \f$ */ static __m128i gmul(__m128i v, __m128i h) { /* multiply */ __m128i z0, z1, z2, tmp; z0 = _mm_clmulepi64_si128(v, h, 0x11); z2 = _mm_clmulepi64_si128(v, h, 0x00); __m128i tmpv = _mm_srli_si128(v, 8); tmpv = _mm_xor_si128(tmpv, v); __m128i tmph = _mm_srli_si128(h, 8); tmph = _mm_xor_si128(tmph, h); z1 = _mm_clmulepi64_si128(tmpv, tmph, 0x00); z1 = _mm_xor_si128(z1, z0); z1 = _mm_xor_si128(z1, z2); tmp = _mm_srli_si128(z1, 8); __m128i pl = _mm_xor_si128(z0, tmp); tmp = _mm_slli_si128(z1, 8); __m128i ph = _mm_xor_si128(z2, tmp); tmp = _mm_srli_epi64(ph, 63); tmp = _mm_srli_si128(tmp, 8); pl = shl(pl, 1); pl = _mm_xor_si128(pl, tmp); ph = shl(ph, 1); /* reduce */ __m128i b, c; b = c = _mm_slli_si128(ph, 8); b = _mm_slli_epi64(b, 62); c = _mm_slli_epi64(c, 57); tmp = _mm_xor_si128(b, c); __m128i d = _mm_xor_si128(ph, tmp); __m128i e = shr(d, 1); __m128i f = shr(d, 2); __m128i g = shr(d, 7); pl = _mm_xor_si128(pl, d); pl = _mm_xor_si128(pl, e); pl = _mm_xor_si128(pl, f); pl = _mm_xor_si128(pl, g); return pl; }

void WebRtcG729fix_Lsf_lsp2( int16_t lsf[], /* (i) Q13 : lsf[m] (range: 0.0<=val<PI) */ int16_t lsp[], /* (o) Q15 : lsp[m] (range: -1<=val<1) */ int16_t m /* (i) : LPC order */ ) { int16_t i, ind; int16_t offset; /* in Q8 */ int16_t freq; /* normalized frequency in Q15 */ int32_t L_tmp; for(i=0; i<m; i++) { /* freq = abs_s(freq);*/ freq = mult(lsf[i], 20861); /* 20861: 1.0/(2.0*PI) in Q17 */ ind = shr(freq, 8); /* ind = b8-b15 of freq */ offset = freq & (int16_t)0x00ff; /* offset = b0-b7 of freq */ if (ind > 63){ ind = 63; /* 0 <= ind <= 63 */ } /* lsp[i] = table2[ind]+ (slope_cos[ind]*offset >> 12) */ L_tmp = L_mult(WebRtcG729fix_slope_cos[ind], offset); /* L_tmp in Q28 */ lsp[i] = WebRtcSpl_AddSatW16(WebRtcG729fix_table2[ind], extract_l(L_shr(L_tmp, 13))); } }

void get_pq_polynomials( Word32 *f, /* Q23 */ Word16 *lsp) /* Q15 */ { Word16 i, n, hi, lo; Word16 index, offset, coslsp, c; Word32 a0; f[0] = L_mult(2048, 2048); // 1.0 Q23 for(i = 1; i <= LPCO ; i++) f[i]= 0; for(n=1; n<=(LPCO>>1); n++) { /* cosine mapping */ index = shr(lsp[2*n-2],9); // Q6 offset = lsp[2*n-2]&(Word16)0x01ff; // Q9 a0 = L_mult(sub(costable[index+1], costable[index]), offset); // Q10 coslsp = add(costable[index], intround(L_shl(a0, 6))); // Q15 cos((double)PI*lsp[2*n-2]) c = coslsp; // Q14 c = 2. * cos((double)PI*lsp[2*n-2]) for(i = 2*n; i >= 2; i--) { L_Extract(f[i-1], &hi, &lo); f[i] = L_add(f[i], f[i-2]); // Q23 f[i] += f[i-2] a0 = Mpy_32_16(hi, lo, c); // Q22 f[i] = L_sub(f[i], L_shl(a0,1)); // Q23 f[i] += f[i-2] - c*f[i-1]; } f[1] = L_msu(f[1], c, 256); // Q23 f[1] -= c; } return; }

void ngen_CC_param(shil_opcode& op, shil_param& prm, CanonicalParamType tp) { switch (tp) { case CPT_u32: case CPT_ptr: case CPT_f32: { CC_PS t = { tp, &prm }; CC_pars.push_back(t); } break; //store from EAX case CPT_u64rvL: case CPT_u32rv: mov(rcx, rax); reg_to_sh(prm, ecx); break; case CPT_u64rvH: shr(rcx, 32); reg_to_sh(prm, ecx); break; //Store from ST(0) case CPT_f32rv: reg_to_sh_ss(prm, xmm0); break; } }

/*************************************************************************** * * Function : build_CN_code * ***************************************************************************/ void build_CN_code ( Word32 *seed, /* i/o : Old CN generator shift register state */ Word16 cod[] /* o : Generated CN fixed codebook vector */ ) { Word16 i, j, k; for (i = 0; i < L_SUBFR; i++) { cod[i] = 0; } for (k = 0; k < NB_PULSE10; k++) { i = pseudonoise (seed, 2); /* generate pulse position */ i = shr (extract_l (L_mult (i, 10)), 1); i = i + k; j = pseudonoise (seed, 1); /* generate sign */ if (j > 0) { cod[i] = 4096; } else { cod[i] = -4096; } } return; }

Code() { Xbyak::Label label; cmpss(xmm0, ptr[rip + label], 0); test(dword[rip + label], 33); bt(dword[rip + label ], 3); vblendpd(xmm0, dword[rip + label], 3); vpalignr(xmm0, qword[rip + label], 4); vextractf128(dword[rip + label], ymm3, 12); vperm2i128(ymm0, ymm1, qword[rip + label], 13); vcvtps2ph(ptr[rip + label], xmm2, 44); mov(dword[rip + label], 0x1234); shl(dword[rip + label], 3); shr(dword[rip + label], 1); shld(qword[rip + label], rax, 3); imul(rax, qword[rip + label], 21); rorx(rax, qword[rip + label], 21); test(dword[rip + label], 5); pextrq(ptr[rip + label], xmm0, 3); pinsrq(xmm2, ptr[rip + label], 5); pextrw(ptr[rip + label], xmm1, 4); adc(dword[rip + label], 0x12345); bt(byte[rip + label], 0x34); btc(word[rip + label], 0x34); btr(dword[rip + label], 0x34); rcl(dword[rip + label], 4); shld(qword[rip + label], rax, 4); palignr(mm0, ptr[rip + label], 4); aeskeygenassist(xmm3, ptr[rip + label], 4); vpcmpestrm(xmm2, ptr[rip + label], 7); ret(); L(label); dq(0x123456789abcdef0ull); };

static inline uint32_t get_op_code(uint32_t word) { return shr(shl(word, 64 - 32), 64 - 4); /*Bitpack_getu(word, OP_CODE_LENGTH, (WORD_LENGTH - OP_CODE_LENGTH));*/ }

static void Int2bin ( Word16 value, /* input : value to be converted to binary */ Word16 no_of_bits, /* input : number of bits associated with value */ Word16 *bitstream /* output: address where bits are written */ ) { Word16 *pt_bitstream, i, bit; pt_bitstream = &bitstream[no_of_bits]; for (i = 0; i < no_of_bits; i++) { bit = value & MASK; if (bit == 0) { *--pt_bitstream = BIT_0; } else { *--pt_bitstream = BIT_1; } value = shr (value, 1); } }

long interpolation_cos129( short freq ) { short sin_data,cos_data,count,temp ; long Ltemp,Lresult; /* cos(x)=cos(a)+(x-a)sin(a)-pow((a-x),2)*cos(a) */ count=shr(abs_s(freq ),7 ); temp=sub( extract_l(L_mult( count,64)) , freq ); /* (a-x)sin a */ /* Scale factor for (a-x): 3217=pi2/64 */ sin_data=sin129_table [ count]; cos_data=cos129_table [count]; Ltemp=L_mpy_ls(L_mult(3217,temp),sin_data); /* (a-x) sin(a) - [(a-x)*(a-x)*cos(a)] /2 */ /* Scale factor for (a-x)*(a-x): 20213=pi2*pi2/64 */ Ltemp=L_sub(Ltemp, L_mpy_ls(L_mult(mult_r(10106,temp),temp),cos_data)); /* Scaled up by 64/2 times */ Ltemp=L_shl( Ltemp ,6 ); Lresult= L_add(L_deposit_h(cos_data), (Ltemp)) ; return(Lresult); }

void Lsf_lsp2( Word16 lsf[], /* (i) Q13 : lsf[m] (range: 0.0<=val<PI) */ Word16 lsp[], /* (o) Q15 : lsp[m] (range: -1<=val<1) */ Word16 m /* (i) : LPC order */ ) { Word16 i, ind; Word16 offset; /* in Q8 */ Word16 freq; /* normalized frequency in Q15 */ Word32 L_tmp; for(i=0; i<m; i++) { /* freq = abs_s(freq);*/ freq = mult(lsf[i], 20861); /* 20861: 1.0/(2.0*PI) in Q17 */ ind = shr(freq, 8); /* ind = b8-b15 of freq */ offset = freq & (Word16)0x00ff; /* offset = b0-b7 of freq */ if ( sub(ind, 63)>0 ){ ind = 63; /* 0 <= ind <= 63 */ } /* lsp[i] = table2[ind]+ (slope_cos[ind]*offset >> 12) */ L_tmp = L_mult(slope_cos[ind], offset); /* L_tmp in Q28 */ lsp[i] = add(table2[ind], extract_l(L_shr(L_tmp, 13))); } return; }

/*************************************************************************** Function: compute_raw_pow_categories Syntax: void compute_raw_pow_categories(Word16 *power_categories, Word16 *rms_index, Word16 number_of_regions, Word16 offset) inputs: *rms_index number_of_regions offset outputs: *power_categories Description: This function computes the power categories given the offset This is kind of redundant since they were already computed in calc_offset to determine the offset. WMOPS: | 24kbit | 32kbit -------|--------------|---------------- AVG | 0.01 | 0.01 -------|--------------|---------------- MAX | 0.01 | 0.01 -------|--------------|---------------- 14kHz | 24kbit | 32kbit | 48kbit -------|--------------|----------------|---------------- AVG | 0.01 | 0.01 | 0.01 -------|--------------|----------------|---------------- MAX | 0.01 | 0.01 | 0.01 -------|--------------|----------------|---------------- ***************************************************************************/ void compute_raw_pow_categories(Word16 *power_categories,Word16 *rms_index,Word16 number_of_regions,Word16 offset) { Word16 region; Word16 j; Word16 temp; for (region=0; region<number_of_regions; region++) { j = sub(offset,rms_index[region]); j = shr(j,1); /* make sure j is between 0 and NUM_CAT-1 */ test(); if (j < 0) { j = 0; move16(); } temp = sub(j,(NUM_CATEGORIES-1)); test(); if (temp > 0) j = sub(NUM_CATEGORIES,1); power_categories[region] = j; move16(); } }

void Bitpack( INT16 in, UINT16 *TrWords, INT16 NoOfBits, INT16 *ptr ) { INT16 temp; UINT16 *WordPtr; WordPtr = TrWords + ptr[1]; *ptr = sub(*ptr, NoOfBits); if (*ptr >= 0) { /* NOTE: Creating *WordPtr requires an unsigned shift. There isn't a fixed * point prototype for an unsigned shift, so I kept the C version. */ *WordPtr = *WordPtr | (in << *ptr); } else { temp = shr(in, negate(*ptr)); *WordPtr = *WordPtr | temp; WordPtr++; *ptr = add(*ptr, 16); *WordPtr = (INT16) ((INT32) (L_shl((INT32) in, *ptr)) & 0xffff); ptr[1] += 1; } }

Word32 Inv_sqrt( /* (o) Q30 : output value (range: 0<=val<1) */ Word32 L_x /* (i) Q0 : input value (range: 0<=val<=7fffffff) */ ) { Word16 exp, i, a, tmp; Word32 L_y; if( L_x <= (Word32)0) return ( (Word32)0x3fffffffL); exp = norm_l(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalize */ exp = sub(30, exp); if( (exp & 1) == 0 ) /* If exponent even -> shift right */ L_x = L_shr(L_x, 1); exp = shr(exp, 1); exp = add(exp, 1); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 */ a = a & (Word16)0x7fff; i = sub(i, 16); L_y = L_deposit_h(tabsqr[i]); /* tabsqr[i] << 16 */ tmp = sub(tabsqr[i], tabsqr[i+1]); /* tabsqr[i] - tabsqr[i+1]) */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ L_y = L_shr(L_y, exp); /* denormalization */ return(L_y); }

int32_t WebRtcG729fix_Inv_sqrt( /* (o) Q30 : output value (range: 0<=val<1) */ int32_t L_x /* (i) Q0 : input value (range: 0<=val<=7fffffff) */ ) { int16_t exp, i, a, tmp; int32_t L_y; if( L_x <= (int32_t)0) return ( (int32_t)0x3fffffffL); exp = WebRtcSpl_NormW32(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalize */ exp = WebRtcSpl_SubSatW16(30, exp); if( (exp & 1) == 0 ) /* If exponent even -> shift right */ L_x = L_shr(L_x, 1); exp = shr(exp, 1); exp = WebRtcSpl_AddSatW16(exp, 1); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 */ a = a & (int16_t)0x7fff; i = WebRtcSpl_SubSatW16(i, 16); L_y = L_deposit_h(WebRtcG729fix_tabsqr[i]); /* tabsqr[i] << 16 */ tmp = WebRtcSpl_SubSatW16(WebRtcG729fix_tabsqr[i], WebRtcG729fix_tabsqr[i+1]); /* tabsqr[i] - tabsqr[i+1]) */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ L_y = L_shr(L_y, exp); /* denormalization */ return(L_y); }

uint64_t Bitpack_getu(uint64_t word, unsigned width, unsigned lsb) { unsigned hi = lsb + width; /* one beyond the most significant bit */ assert(hi <= 64); /* different type of right shift */ return shr(shl(word, 64 - hi), 64 - width); }

bool Bitpack_fitsu(uint64_t n, unsigned width) { if (width >= 64) return true; /* thanks to Jai Karve and John Bryan */ return shr(n, width) == 0; // clever shortcut instead of 2 shifts }

void energy_computation ( Word16 r_h[], Word16 scal_acf, Word16 rvad[], Word16 scal_rvad, Pfloat * acf0, Pfloat * pvad ) { Word16 i, temp, norm_prod; Word32 L_temp; /* r[0] is always greater than zero (no need to test for r[0] == 0) */ /* Computation of acf0 (exponent and mantissa) */ acf0->e = sub (32, scal_acf); acf0->m = r_h[0] & 0x7ff8; /* Computation of pvad (exponent and mantissa) */ pvad->e = add (acf0->e, 14); pvad->e = sub (pvad->e, scal_rvad); L_temp = 0L; for (i = 1; i <= 8; i++) { temp = shr (r_h[i], 3); L_temp = L_mac (L_temp, temp, rvad[i]); } temp = shr (r_h[0], 3); L_temp = L_add (L_temp, L_shr (L_mult (temp, rvad[0]), 1)); if (L_temp <= 0L) { L_temp = 1L; } norm_prod = norm_l (L_temp); pvad->e = sub (pvad->e, norm_prod); if(norm_prod<=0) pvad->m = extract_h (L_shr (L_temp, -norm_prod)); else pvad->m = extract_h (L_shl (L_temp, norm_prod)); return; }

void Corr_xy2( Word16 xn[], /* (i) Q0 :Target vector. */ Word16 y1[], /* (i) Q0 :Adaptive codebook. */ Word16 y2[], /* (i) Q12 :Filtered innovative vector. */ Word16 g_coeff[], /* (o) Q[exp]:Correlations between xn,y1,y2 */ Word16 exp_g_coeff[] /* (o) :Q-format of g_coeff[] */ ) { Word16 i,exp; Word16 exp_y2y2,exp_xny2,exp_y1y2; Word16 y2y2, xny2, y1y2; Word32 L_acc; Word16 scaled_y2[L_SUBFR]; /* Q9 */ /*------------------------------------------------------------------* * Scale down y2[] from Q12 to Q9 to avoid overflow * *------------------------------------------------------------------*/ for(i=0; i<L_SUBFR; i++) { scaled_y2[i] = shr(y2[i], 3); } /* Compute scalar product <y2[],y2[]> */ L_acc = 1; /* Avoid case of all zeros */ for(i=0; i<L_SUBFR; i++) L_acc = L_mac(L_acc, scaled_y2[i], scaled_y2[i]); /* L_acc:Q19 */ exp = norm_l(L_acc); y2y2 = round( L_shl(L_acc, exp) ); exp_y2y2 = add(exp, 19-16); /* Q[19+exp-16] */ g_coeff[2] = y2y2; exp_g_coeff[2] = exp_y2y2; /* Compute scalar product <xn[],y2[]> */ L_acc = 1; /* Avoid case of all zeros */ for(i=0; i<L_SUBFR; i++) L_acc = L_mac(L_acc, xn[i], scaled_y2[i]); /* L_acc:Q10 */ exp = norm_l(L_acc); xny2 = round( L_shl(L_acc, exp) ); exp_xny2 = add(exp, 10-16); /* Q[10+exp-16] */ g_coeff[3] = negate(xny2); exp_g_coeff[3] = sub(exp_xny2,1); /* -2<xn,y2> */ /* Compute scalar product <y1[],y2[]> */ L_acc = 1; /* Avoid case of all zeros */ for(i=0; i<L_SUBFR; i++) L_acc = L_mac(L_acc, y1[i], scaled_y2[i]); /* L_acc:Q10 */ exp = norm_l(L_acc); y1y2 = round( L_shl(L_acc, exp) ); exp_y1y2 = add(exp, 10-16); /* Q[10+exp-16] */ g_coeff[4] = y1y2; exp_g_coeff[4] = sub(exp_y1y2,1); ; /* 2<y1,y2> */ return; }

/*-----------------------------------------------------* * Function Autocorr() * * * * Compute autocorrelations of signal with windowing * * * *-----------------------------------------------------*/ void Autocorr( Word16 x[], /* (i) : Input signal */ Word16 m, /* (i) : LPC order */ Word16 r_h[], /* (o) : Autocorrelations (msb) */ Word16 r_l[] /* (o) : Autocorrelations (lsb) */ ) { Word16 i, j, norm; Word16 y[L_WINDOW]; Word32 sum; extern Flag Overflow; /* Windowing of signal */ for(i=0; i<L_WINDOW; i++) { y[i] = mult_r(x[i], hamwindow[i]); } /* Compute r[0] and test for overflow */ do { Overflow = 0; sum = 1; /* Avoid case of all zeros */ for(i=0; i<L_WINDOW; i++) sum = L_mac(sum, y[i], y[i]); /* If overflow divide y[] by 4 */ if(Overflow != 0) { for(i=0; i<L_WINDOW; i++) { y[i] = shr(y[i], 2); } } }while (Overflow != 0); /* Normalization of r[0] */ norm = norm_l(sum); sum = L_shl(sum, norm); L_Extract(sum, &r_h[0], &r_l[0]); /* Put in DPF format (see oper_32b) */ /* r[1] to r[m] */ for (i = 1; i <= m; i++) { sum = 0; for(j=0; j<L_WINDOW-i; j++) sum = L_mac(sum, y[j], y[j+i]); sum = L_shl(sum, norm); L_Extract(sum, &r_h[i], &r_l[i]); } return; }

/************************************************************************* * * FUNCTION: compress_code() * * PURPOSE: compression of the linear codewords to 4+three indeces * one bit from each pulse is made robust to errors by * minimizing the phase shift of a bit error. * 4 signs (one for each track) * i0,i4,i1 => one index (7+3) bits, 3 LSBs more robust * i2,i6,i5 => one index (7+3) bits, 3 LSBs more robust * i3,i7 => one index (5+2) bits, 2-3 LSbs more robust * *************************************************************************/ void compress_code ( Word16 sign_indx[], /* i : signs of 4 pulses (signs only) */ Word16 pos_indx[], /* i : position index of 8 pulses (position only) */ Word16 indx[]) /* o : position and sign of 8 pulses (compressed) */ { Word16 i, ia, ib, ic; for (i = 0; i < NB_TRACK_MR102; i++) { indx[i] = sign_indx[i]; } /* First index indx[NB_TRACK] = (ia/2+(ib/2)*5 +(ic/2)*25)*8 + ia%2 + (ib%2)*2 + (ic%2)*4; */ indx[NB_TRACK_MR102] = compress10(pos_indx[0],pos_indx[4],pos_indx[1]); /* Second index indx[NB_TRACK+1] = (ia/2+(ib/2)*5 +(ic/2)*25)*8 + ia%2 + (ib%2)*2 + (ic%2)*4; */ indx[NB_TRACK_MR102+1]= compress10(pos_indx[2],pos_indx[6],pos_indx[5]); /* Third index if ((ib/2)%2 == 1) indx[NB_TRACK+2] = ((((4-ia/2) + (ib/2)*5)*32+12)/25)*4 + ia%2 + (ib%2)*2; else indx[NB_TRACK+2] = ((((ia/2) + (ib/2)*5)*32+12)/25)*4 + ia%2 + (ib%2)*2; */ ib = shr(pos_indx[7], 1) & 1; if (sub(ib, 1) == 0) ia = sub(4, shr(pos_indx[3], 1)); else ia = shr(pos_indx[3], 1); ib = extract_l(L_shr(L_mult(shr(pos_indx[7], 1), 5), 1)); ib = add(shl(add(ia, ib), 5), 12); ic = shl(mult(ib, 1311), 2); ia = pos_indx[3] & 1; ib = shl((pos_indx[7] & 1), 1); indx[NB_TRACK_MR102+2] = add(ia, add(ib, ic)); }

int encode64 (char *dst, unsigned char *src, int size) { char *dptr = dst; unsigned char *sptr = src; unsigned char *end = sptr + size; unsigned char c1, c2; do { c1 = *sptr++; *dptr++ = itoa64[shr(c1, 2)]; c1 = shl((c1 & 0x03), 4); if (sptr >= end) { *dptr++ = itoa64[c1]; break; } c2 = *sptr++; c1 |= shr(c2, 4) & 0x0f; *dptr++ = itoa64[c1]; c1 = shl((c2 & 0x0f), 2); if (sptr >= end) { *dptr++ = itoa64[c1]; break; } c2 = *sptr++; c1 |= shr(c2, 6) & 0x03; *dptr++ = itoa64[c1]; *dptr++ = itoa64[c2 & 0x3f]; } while (sptr < end); *dptr = '\0'; return (dptr - dst); }

static __inline u_int64_t s64_3( u_int64_t x) { u_int64_t ans = shl(x, 2); u_int64_t roll_temp = rotl64(x, 28); ans ^= roll_temp; ans ^= shr(x, 2); ans ^= rotl64(roll_temp, 31); return ans; //(shr((x), 2) ^ shl((x), 2) ^ rotl64((x), 28) ^ rotl64((x), 59)); }