Word16 voice_factor(                                  /* (o) Q15   : factor (-1=unvoiced to 1=voiced) */
        Word16 exc[],                         /* (i) Q_exc : pitch excitation                 */
        Word16 Q_exc,                         /* (i)       : exc format                       */
        Word16 gain_pit,                      /* (i) Q14   : gain of pitch                    */
        Word16 code[],                        /* (i) Q9    : Fixed codebook excitation        */
        Word16 gain_code,                     /* (i) Q0    : gain of code                     */
        Word16 L_subfr                        /* (i)       : subframe length                  */
        )
{
    Word16 tmp, exp, ener1, exp1, ener2, exp2;
    Word32 i, L_tmp;

#ifdef ASM_OPT               /* asm optimization branch */
    ener1 = extract_h(Dot_product12_asm(exc, exc, L_subfr, &exp1));
#else
    ener1 = extract_h(Dot_product12(exc, exc, L_subfr, &exp1));
#endif
    exp1 = exp1 - (Q_exc + Q_exc);
    L_tmp = vo_L_mult(gain_pit, gain_pit);
    exp = norm_l(L_tmp);
    tmp = extract_h(L_tmp << exp);
    ener1 = vo_mult(ener1, tmp);
    exp1 = exp1 - exp - 10;        /* 10 -> gain_pit Q14 to Q9 */

#ifdef ASM_OPT                /* asm optimization branch */
    ener2 = extract_h(Dot_product12_asm(code, code, L_subfr, &exp2));
#else
    ener2 = extract_h(Dot_product12(code, code, L_subfr, &exp2));
#endif

    exp = norm_s(gain_code);
    tmp = gain_code << exp;
    tmp = vo_mult(tmp, tmp);
    ener2 = vo_mult(ener2, tmp);
    exp2 = exp2 - (exp + exp);

    i = exp1 - exp2;

    if (i >= 0)
    {
        ener1 = ener1 >> 1;
        ener2 = ener2 >> (i + 1);
    } else
Exemplo n.º 2
0
void ACELP_4t64_fx(
		Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
		Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
		Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
		Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
		Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
		Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
		Word16 ser_size,                      /* (i) : bit rate                                         */
		Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
		/* (o) : index (36): 9+9+9+9 = 36 bits.                   */
		/* (o) : index (44): 13+9+13+9 = 44 bits.                 */
		/* (o) : index (52): 13+13+13+13 = 52 bits.               */
		/* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
		/* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
		/* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
		)
{
	Word32 i, j, k;
	Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
	Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
	Word16 *p0, *p1, *p2, *p3, *psign;
	Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
	Word32 s, cor, L_tmp, L_index;
	Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
	Word16 ind[NPMAXPT * NB_TRACK];
	Word16 codvec[NB_PULSE_MAX], nbpos[10];
	Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
	Word16 h_buf[4 * L_SUBFR];
	Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
	Word16 ipos[NB_PULSE_MAX];

	switch (nbbits)
	{
		case 20:                               /* 20 bits, 4 pulses, 4 tracks */
			nbiter = 4;                          /* 4x16x16=1024 loop */
			alp = 8192;                          /* alp = 2.0 (Q12) */
			nb_pulse = 4;                      
			nbpos[0] = 4;                      
			nbpos[1] = 8;                      
			break;
		case 36:                               /* 36 bits, 8 pulses, 4 tracks */
			nbiter = 4;                          /* 4x20x16=1280 loop */
			alp = 4096;                          /* alp = 1.0 (Q12) */
			nb_pulse = 8;                      
			nbpos[0] = 4;                      
			nbpos[1] = 8;                      
			nbpos[2] = 8;                      
			break;
		case 44:                               /* 44 bits, 10 pulses, 4 tracks */
			nbiter = 4;                          /* 4x26x16=1664 loop */
			alp = 4096;                          /* alp = 1.0 (Q12) */
			nb_pulse = 10;                     
			nbpos[0] = 4;                      
			nbpos[1] = 6;                      
			nbpos[2] = 8;                      
			nbpos[3] = 8;                      
			break;
		case 52:                               /* 52 bits, 12 pulses, 4 tracks */
			nbiter = 4;                          /* 4x26x16=1664 loop */
			alp = 4096;                          /* alp = 1.0 (Q12) */
			nb_pulse = 12;                     
			nbpos[0] = 4;                      
			nbpos[1] = 6;                      
			nbpos[2] = 8;                      
			nbpos[3] = 8;                      
			break;
		case 64:                               /* 64 bits, 16 pulses, 4 tracks */
			nbiter = 3;                          /* 3x36x16=1728 loop */
			alp = 3277;                          /* alp = 0.8 (Q12) */
			nb_pulse = 16;                     
			nbpos[0] = 4;                      
			nbpos[1] = 4;                      
			nbpos[2] = 6;                      
			nbpos[3] = 6;                      
			nbpos[4] = 8;                      
			nbpos[5] = 8;                      
			break;
		case 72:                               /* 72 bits, 18 pulses, 4 tracks */
			nbiter = 3;                          /* 3x35x16=1680 loop */
			alp = 3072;                          /* alp = 0.75 (Q12) */
			nb_pulse = 18;                     
			nbpos[0] = 2;                      
			nbpos[1] = 3;                      
			nbpos[2] = 4;                      
			nbpos[3] = 5;                      
			nbpos[4] = 6;                      
			nbpos[5] = 7;                      
			nbpos[6] = 8;                      
			break;
		case 88:                               /* 88 bits, 24 pulses, 4 tracks */
			if(ser_size > 462)
				nbiter = 1;
			else
				nbiter = 2;                    /* 2x53x16=1696 loop */

			alp = 2048;                          /* alp = 0.5 (Q12) */
			nb_pulse = 24;                     
			nbpos[0] = 2;                      
			nbpos[1] = 2;                      
			nbpos[2] = 3;                      
			nbpos[3] = 4;                      
			nbpos[4] = 5;                      
			nbpos[5] = 6;                      
			nbpos[6] = 7;                      
			nbpos[7] = 8;                      
			nbpos[8] = 8;                      
			nbpos[9] = 8;                      
			break;
		default:
			nbiter = 0;
			alp = 0;
			nb_pulse = 0;
	}

	for (i = 0; i < nb_pulse; i++)
	{
		codvec[i] = i;                     
	}

	/*----------------------------------------------------------------*
	 * Find sign for each pulse position.                             *
	 *----------------------------------------------------------------*/
	/* calculate energy for normalization of cn[] and dn[] */
	/* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
#ifdef ASM_OPT                  /* asm optimization branch */
	s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
#else
	s = Dot_product12(cn, cn, L_SUBFR, &exp);
#endif

	Isqrt_n(&s, &exp);
	s = L_shl(s, (exp + 5)); 
	k_cn = extract_h(L_add(s, 0x8000));

	/* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
#ifdef ASM_OPT                      /* asm optimization branch */
	s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
#else
	s = Dot_product12(dn, dn, L_SUBFR, &exp);
#endif

	Isqrt_n(&s, &exp);
	k_dn = (L_shl(s, (exp + 5 + 3)) + 0x8000) >> 16;    /* k_dn = 256..4096 */
	k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */

	/* mix normalized cn[] and dn[] */
	p0 = cn;
	p1 = dn;
	p2 = dn2;

	for (i = 0; i < L_SUBFR/4; i++)
	{
		s = (k_cn* (*p0++))+(k_dn * (*p1++));
		*p2++ = s >> 7;
		s = (k_cn* (*p0++))+(k_dn * (*p1++));
		*p2++ = s >> 7;
		s = (k_cn* (*p0++))+(k_dn * (*p1++));
		*p2++ = s >> 7;
		s = (k_cn* (*p0++))+(k_dn * (*p1++));
		*p2++ = s >> 7; 
	}

	/* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
	for(i = 0; i < L_SUBFR; i++)
	{
		val = dn[i];                   
		ps = dn2[i];                   
		if (ps >= 0)
		{
			sign[i] = 32767;             /* sign = +1 (Q12) */
			vec[i] = -32768;           
		} else
		{
			sign[i] = -32768;            /* sign = -1 (Q12) */
			vec[i] = 32767;            
			dn[i] = -val;
			dn2[i] = -ps;
		}
	}
	/*----------------------------------------------------------------*
	 * Select NB_MAX position per track according to max of dn2[].    *
	 *----------------------------------------------------------------*/
	pos = 0;
	for (i = 0; i < NB_TRACK; i++)
	{
		for (k = 0; k < NB_MAX; k++)
		{
			ps = -1;                       
			for (j = i; j < L_SUBFR; j += STEP)
			{
				if(dn2[j] > ps)
				{
					ps = dn2[j];          
					pos = j;               
				}
			}
			dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
			if (k == 0)
			{
				pos_max[i] = pos;          
			}
		}
	}

	/*--------------------------------------------------------------*
	 * Scale h[] to avoid overflow and to get maximum of precision  *
	 * on correlation.                                              *
	 *                                                              *
	 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
	 *  ==> This allow addition of 16 pulses without saturation.    *
	 *                                                              *
	 * Energy worst case (on resonant impulse response),            *
	 * - energy of h[] is approximately MAX/16.                     *
	 * - During search, the energy is divided by 8 to avoid         *
	 *   overflow on "alp". (energy of h[] = MAX/128).              *
	 *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
	 *--------------------------------------------------------------*/

	/* impulse response buffer for fast computation */

	h = h_buf;                             
	h_inv = h_buf + (2 * L_SUBFR);   
	L_tmp = 0;
	for (i = 0; i < L_SUBFR; i++)
	{
		*h++ = 0;                          
		*h_inv++ = 0;   
		L_tmp += (H[i] * H[i]) << 1;
	}
	/* scale h[] down (/2) when energy of h[] is high with many pulses used */
	val = extract_h(L_tmp);
	h_shift = 0;                           

	if ((nb_pulse >= 12) && (val > 1024))
	{
		h_shift = 1;                       
	}
	p0 = H;
	p1 = h;
	p2 = h_inv;

	for (i = 0; i < L_SUBFR/4; i++)
	{
		*p1 = *p0++ >> h_shift;         
		*p2++ = -(*p1++);  
		*p1 = *p0++ >> h_shift;         
		*p2++ = -(*p1++); 
		*p1 = *p0++ >> h_shift;         
		*p2++ = -(*p1++); 
		*p1 = *p0++ >> h_shift;         
		*p2++ = -(*p1++); 
	}

	/*------------------------------------------------------------*
	 * Compute rrixix[][] needed for the codebook search.         *
	 * This algorithm compute impulse response energy of all      *
	 * positions (16) in each track (4).       Total = 4x16 = 64. *
	 *------------------------------------------------------------*/

	/* storage order --> i3i3, i2i2, i1i1, i0i0 */

	/* Init pointers to last position of rrixix[] */
	p0 = &rrixix[0][NB_POS - 1];           
	p1 = &rrixix[1][NB_POS - 1];           
	p2 = &rrixix[2][NB_POS - 1];           
	p3 = &rrixix[3][NB_POS - 1];           

	ptr_h1 = h;                            
	cor = 0x00008000L;                             /* for rounding */
	for (i = 0; i < NB_POS; i++)
	{
		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
		ptr_h1++;
		*p3-- = extract_h(cor);            
		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
		ptr_h1++;
		*p2-- = extract_h(cor);            
		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
		ptr_h1++;
		*p1-- = extract_h(cor);            
		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
		ptr_h1++;
		*p0-- = extract_h(cor);            
	}

	/*------------------------------------------------------------*
	 * Compute rrixiy[][] needed for the codebook search.         *
	 * This algorithm compute correlation between 2 pulses        *
	 * (2 impulses responses) in 4 possible adjacents tracks.     *
	 * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
	 *------------------------------------------------------------*/

	/* storage order --> i2i3, i1i2, i0i1, i3i0 */

	pos = MSIZE - 1;                       
	ptr_hf = h + 1;                        

	for (k = 0; k < NB_POS; k++)
	{
		p3 = &rrixiy[2][pos];              
		p2 = &rrixiy[1][pos];              
		p1 = &rrixiy[0][pos];              
		p0 = &rrixiy[3][pos - NB_POS];     

		cor = 0x00008000L;                   /* for rounding */
		ptr_h1 = h;                        
		ptr_h2 = ptr_hf;                   

		for (i = k + 1; i < NB_POS; i++)
		{
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p3 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p2 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p1 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p0 = extract_h(cor);         

			p3 -= (NB_POS + 1);
			p2 -= (NB_POS + 1);
			p1 -= (NB_POS + 1);
			p0 -= (NB_POS + 1);
		}
		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
		ptr_h1++;
		ptr_h2++;
		*p3 = extract_h(cor);              
		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
		ptr_h1++;
		ptr_h2++;
		*p2 = extract_h(cor);              
		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
		ptr_h1++;
		ptr_h2++;
		*p1 = extract_h(cor);              

		pos -= NB_POS;
		ptr_hf += STEP;
	}

	/* storage order --> i3i0, i2i3, i1i2, i0i1 */

	pos = MSIZE - 1;                       
	ptr_hf = h + 3;                        

	for (k = 0; k < NB_POS; k++)
	{
		p3 = &rrixiy[3][pos];              
		p2 = &rrixiy[2][pos - 1];          
		p1 = &rrixiy[1][pos - 1];          
		p0 = &rrixiy[0][pos - 1];          

		cor = 0x00008000L;								/* for rounding */
		ptr_h1 = h;                        
		ptr_h2 = ptr_hf;                   

		for (i = k + 1; i < NB_POS; i++)
		{
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p3 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p2 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p1 = extract_h(cor);          
			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
			ptr_h1++;
			ptr_h2++;
			*p0 = extract_h(cor);          

			p3 -= (NB_POS + 1);
			p2 -= (NB_POS + 1);
			p1 -= (NB_POS + 1);
			p0 -= (NB_POS + 1);
		}
		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
		ptr_h1++;
		ptr_h2++;
		*p3 = extract_h(cor);              

		pos--;
		ptr_hf += STEP;
	}

	/*------------------------------------------------------------*
	 * Modification of rrixiy[][] to take signs into account.     *
	 *------------------------------------------------------------*/

	p0 = &rrixiy[0][0];                    

	for (k = 0; k < NB_TRACK; k++)
	{
		j_temp = (k + 1)&0x03;
		for (i = k; i < L_SUBFR; i += STEP)
		{
			psign = sign;                  
			if (psign[i] < 0)
			{
				psign = vec;               
			}
			j = j_temp;
			for (; j < L_SUBFR; j += STEP)
			{
				*p0 = vo_mult(*p0, psign[j]);    
				p0++;
			}
		}
	}

	/*-------------------------------------------------------------------*
	 *                       Deep first search                           *
	 *-------------------------------------------------------------------*/

	psk = -1;                              
	alpk = 1;                              

	for (k = 0; k < nbiter; k++)
	{
		j_temp = k<<2;
		for (i = 0; i < nb_pulse; i++)
			ipos[i] = tipos[j_temp + i];

		if(nbbits == 20)
		{
			pos = 0;                       
			ps = 0;                        
			alp = 0;                       
			for (i = 0; i < L_SUBFR; i++)
			{
				vec[i] = 0;                
			}
		} else if ((nbbits == 36) || (nbbits == 44))
		{
			/* first stage: fix 2 pulses */
			pos = 2;

			ix = ind[0] = pos_max[ipos[0]];
			iy = ind[1] = pos_max[ipos[1]];
			ps = dn[ix] + dn[iy];
			i = ix >> 2;                /* ix / STEP */
			j = iy >> 2;                /* iy / STEP */
			s = rrixix[ipos[0]][i] << 13;
			s += rrixix[ipos[1]][j] << 13;
			i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
			s += rrixiy[ipos[0]][i] << 14;
			alp = (s + 0x8000) >> 16;
			if (sign[ix] < 0)
				p0 = h_inv - ix;
			else
				p0 = h - ix;
			if (sign[iy] < 0)
				p1 = h_inv - iy;
			else
				p1 = h - iy;

			for (i = 0; i < L_SUBFR; i++)
			{
				vec[i] = (*p0++) + (*p1++);
			}

			if(nbbits == 44)
			{
				ipos[8] = 0;               
				ipos[9] = 1;               
			}
		} else
		{