Exemple #1
0
static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
{
   int i,j;
   spx_word32_t tmp1=0,tmp2=0;
   for (j=0;j<M;j++)
   {
      tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
   }
   acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
   for (i=1;i<N-1;i+=2)
   {
      tmp1 = tmp2 = 0;
      for (j=0;j<M;j++)
      {
         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
      }
      acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
      acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
   }
   tmp1 = tmp2 = 0;
   for (j=0;j<M;j++)
   {
      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
   }
   acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
}
static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
{
   int i, j, k;
   VARDECL(spx_word16_t *shape);
   ALLOC(shape, subvect_size, spx_word16_t);
   for (i=0;i<shape_cb_size;i++)
   {
      spx_word16_t *res;
      
      res = resp+i*subvect_size;
      for (k=0;k<subvect_size;k++)
         shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
      E[i]=0;

      /* Compute codeword response using convolution with impulse response */
      for(j=0;j<subvect_size;j++)
      {
         spx_word32_t resj=0;
         spx_word16_t res16;
         for (k=0;k<=j;k++)
            resj = MAC16_16(resj,shape[k],r[j-k]);
#ifdef FIXED_POINT
         res16 = EXTRACT16(SHR32(resj, 13));
#else
         res16 = 0.03125f*resj;
#endif
         /* Compute codeword energy */
         E[i]=MAC16_16(E[i],res16,res16);
         res[j] = res16;
         /*printf ("%d\n", (int)res[j]);*/
      }
   }

}
Exemple #3
0
void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState *_stereo)
{
   int i;
   spx_word32_t balance;
   spx_word16_t e_left, e_right, e_ratio;
   RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo;

   /* COMPATIBILITY_HACK(stereo); */
   
   balance=stereo->balance;
   e_ratio=stereo->e_ratio;
   
   /* These two are Q14, with max value just below 2. */
   e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance))));
   e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8);

   for (i=frame_size-1;i>=0;i--)
   {
      spx_int16_t tmp=data[i];
      stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15));
      stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15));
      data[2*i] = (spx_int16_t)MULT16_16_P14(stereo->smooth_left, tmp);
      data[2*i+1] = (spx_int16_t)MULT16_16_P14(stereo->smooth_right, tmp);
   }
}
Exemple #4
0
static void celt_fir5(opus_val16 *x,
         const opus_val16 *num,
         int N)
{
   int i;
   opus_val16 num0, num1, num2, num3, num4;
   opus_val32 mem0, mem1, mem2, mem3, mem4;
   num0=num[0];
   num1=num[1];
   num2=num[2];
   num3=num[3];
   num4=num[4];
   mem0=0;
   mem1=0;
   mem2=0;
   mem3=0;
   mem4=0;
   for (i=0;i<N;i++)
   {
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
      sum = MAC16_16(sum,num0,mem0);
      sum = MAC16_16(sum,num1,mem1);
      sum = MAC16_16(sum,num2,mem2);
      sum = MAC16_16(sum,num3,mem3);
      sum = MAC16_16(sum,num4,mem4);
      mem4 = mem3;
      mem3 = mem2;
      mem2 = mem1;
      mem1 = mem0;
      mem0 = x[i];
      x[i] = ROUND16(sum, SIG_SHIFT);
   }
}
Exemple #5
0
void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
      int N, opus_val32 *xy1, opus_val32 *xy2)
{
   int i;
   __m128 xsum1, xsum2;
   xsum1 = _mm_setzero_ps();
   xsum2 = _mm_setzero_ps();
   for (i=0;i<N-3;i+=4)
   {
      __m128 xi = _mm_loadu_ps(x+i);
      __m128 y1i = _mm_loadu_ps(y01+i);
      __m128 y2i = _mm_loadu_ps(y02+i);
      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
   }
   /* Horizontal sum */
   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
   _mm_store_ss(xy1, xsum1);
   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
   _mm_store_ss(xy2, xsum2);
   for (;i<N;i++)
   {
      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
   }
}
void preProcessing(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t signal[], word16_t preProcessedSignal[]) {
	int i;
	word16_t inputX2;
	word32_t acc; /* in Q12 */

	for(i=0; i<L_FRAME; i++) {
		inputX2 = encoderChannelContext->inputX1;
		encoderChannelContext->inputX1 = encoderChannelContext->inputX0;
		encoderChannelContext->inputX0 = signal[i];
	
		/* compute with acc and coefficients in Q12 */
		acc = MULT16_32_Q12(A1, encoderChannelContext->outputY1); /* Y1 in Q15.12 * A1 in Q1.12 -> acc in Q17.12*/
		acc = MAC16_32_Q12(acc, A2, encoderChannelContext->outputY2); /* Y2 in Q15.12 * A2 in Q0.12 -> Q15.12 + acc in Q17.12 -> acc in Q18.12 */
		/* 3*(Xi in Q15.0 * Bi in Q0.12)->Q17.12 + acc in Q18.12 -> acc in 19.12 */
		acc = MAC16_16(acc, encoderChannelContext->inputX0, B0);
		acc = MAC16_16(acc, encoderChannelContext->inputX1, B1);
		acc = MAC16_16(acc, inputX2, B2);
		/*  acc in Q19.12 : We must check it won't overflow 
			- the Q15.12 of Y
			- the Q15.0 extracted from it by shifting 12 right
		 -> saturate to 28 bits -> acc in Q15.12 */
		acc = SATURATE(acc, MAXINT28);
		
		preProcessedSignal[i] = PSHR(acc,12); /* extract integer value of the Q15.12 representation */
		encoderChannelContext->outputY2 = encoderChannelContext->outputY1;
		encoderChannelContext->outputY1 = acc;
	}
	return;
}
Exemple #7
0
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
    int i;
    opus_val16 ms;
    celt_norm *Xptr;
    Xptr = X;
    ms = NEG16(s);
    for (i=0; i<len-stride; i++)
    {
        celt_norm x1, x2;
        x1 = Xptr[0];
        x2 = Xptr[stride];
        Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
        *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
    }
    Xptr = &X[len-2*stride-1];
    for (i=len-2*stride-1; i>=0; i--)
    {
        celt_norm x1, x2;
        x1 = Xptr[0];
        x2 = Xptr[stride];
        Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
        *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
    }
}
void generateAdaptativeCodebookVector(word16_t excitationVector[], int16_t intPitchDelay, int16_t fracPitchDelay)
{
	int n,i,j;
	word16_t *delayedExcitationVector;
	word16_t *b30Increased;
	word16_t *b30Decreased;

	/* fracPitchDelay is in range [-1, 1], convert it to [0,2] needed by eqA.8 */
	fracPitchDelay = -fracPitchDelay;
	if (fracPitchDelay <0) { /* if fracPitchDelay is 1 -> pitchDelay of int+(1/3) -> int+1-(2/3)*/
		intPitchDelay++;
		fracPitchDelay = 2;
	}
	
	/**/
	delayedExcitationVector = &(excitationVector[-intPitchDelay]); /* delayedExcitationVector is used to address the excitation vector at index -intPitchDelay (-k in eq40) */
	b30Increased = &(b30[fracPitchDelay]); /* b30 increased points to b30[fracPitchDelay] : b30[t] in eq40. b30 in Q15 */
	b30Decreased = &(b30[3-fracPitchDelay]); /* b30 decreased points to b30[-fracPitchDelay] : b30[3-t] in eq40. b30 in Q15 */


	for (n=0; n<L_SUBFRAME; n++) {
		word32_t acc = 0; /* acc in Q15 */
		for (i=0, j=0; i<10; i++, j+=3) { /* j is used as a 3*i index */
			acc = MAC16_16(acc, delayedExcitationVector[n-i], b30Increased[j]); /* WARNING: spec 3.7.1 and A.8 give an equation leading to  delayedExcitationVector[n+i] but ITU code uses delayedExcitationVector[n-i], implemented as code */
			acc = MAC16_16(acc, delayedExcitationVector[n+1+i], b30Decreased[j]);
		}
		excitationVector[n] = SATURATE(PSHR(acc, 15), MAXINT16); /* acc in Q15, shift/round to unscaled value and check overflow on 16 bits */
	}
}
Exemple #9
0
void celt_fir_c(
         const opus_val16 *_x,
         const opus_val16 *num,
         opus_val16 *_y,
         int N,
         int ord,
         opus_val16 *mem,
         int arch)
{
   int i,j;
   VARDECL(opus_val16, rnum);
   VARDECL(opus_val16, x);
   SAVE_STACK;

   ALLOC(rnum, ord, opus_val16);
   ALLOC(x, N+ord, opus_val16);
   for(i=0;i<ord;i++)
      rnum[i] = num[ord-i-1];
   for(i=0;i<ord;i++)
      x[i] = mem[ord-i-1];
   for (i=0;i<N;i++)
      x[i+ord]=_x[i];
   for(i=0;i<ord;i++)
      mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
   (void)arch;
   for (i=0;i<N;i++)
   {
      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
      for (j=0;j<ord;j++)
      {
         sum = MAC16_16(sum,rnum[j],x[i+j]);
      }
      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
   }
#else
   for (i=0;i<N-3;i+=4)
   {
      opus_val32 sum[4]={0,0,0,0};
      xcorr_kernel(rnum, x+i, sum, ord, arch);
      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
   }
   for (;i<N;i++)
   {
      opus_val32 sum = 0;
      for (j=0;j<ord;j++)
         sum = MAC16_16(sum,rnum[j],x[i+j]);
      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
   }
#endif
   RESTORE_STACK;
}
Exemple #10
0
spx_word32_t inner_prod(const spx_word16_t* x, const spx_word16_t* y, int len) {
    spx_word32_t sum = 0;
    len >>= 2;
    while (len--) {
        spx_word32_t part = 0;
        part = MAC16_16(part, *x++, *y++);
        part = MAC16_16(part, *x++, *y++);
        part = MAC16_16(part, *x++, *y++);
        part = MAC16_16(part, *x++, *y++);
        /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
        sum = ADD32(sum, SHR32(part, 6));
    }
    return sum;
}
Exemple #11
0
/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
{
   int i, j;
   spx_float_t W;
   W = FLOAT_AMULT(p, w[0]);
   prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
   for (i=1,j=1;i<N-1;i+=2,j++)
   {
      W = FLOAT_AMULT(p, w[j]);
      prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
      prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
   }
   W = FLOAT_AMULT(p, w[j]);
   prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
}
Exemple #12
0
static int lsp_quant(spx_word16_t *x, const signed char *cdbk, int nbVec, int nbDim)
{
   int i,j;
   spx_word32_t dist;
   spx_word16_t tmp;
   spx_word32_t best_dist=VERY_LARGE32;
   int best_id=0;
   const signed char *ptr=cdbk;
   for (i=0;i<nbVec;i++)
   {
      dist=0;
      for (j=0;j<nbDim;j++)
      {
         tmp=SUB16(x[j],SHL16((spx_word16_t)*ptr++,5));
         dist=MAC16_16(dist,tmp,tmp);
      } 
      if (dist<best_dist)
      {
         best_dist=dist;
         best_id=i;
      }
   }

   for (j=0;j<nbDim;j++)
      x[j] = SUB16(x[j],SHL16((spx_word16_t)cdbk[best_id*nbDim+j],5));
    
   return best_id;
}
Exemple #13
0
/*Finds the indices of the n-best entries in a codebook*/
void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
   int i,j,k,used;
   used = 0;
   for (i=0;i<entries;i++)
   {
      spx_word32_t dist=0;
      for (j=0;j<len;j++)
         dist = MAC16_16(dist,in[j],*codebook++);
#ifdef FIXED_POINT
      dist=SUB32(SHR32(E[i],1),dist);
#else
      dist=.5f*E[i]-dist;
#endif
      if (i<N || dist<best_dist[N-1])
      {
         for (k=N-1; (k >= 1) && (k > used || dist < best_dist[k-1]); k--)
         {
            best_dist[k]=best_dist[k-1];
            nbest[k] = nbest[k-1];
         }
         best_dist[k]=dist;
         nbest[k]=i;
         used++;
      }
   }
}
Exemple #14
0
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
{
   int i, c, N;
   const opus_int16 *eBands = m->eBands;
   N = M*m->shortMdctSize;
   c=0; do {
      for (i=0;i<end;i++)
      {
         int j;
         opus_val32 maxval=0;
         opus_val32 sum = 0;

         j=M*eBands[i]; do {
            maxval = MAX32(maxval, X[j+c*N]);
            maxval = MAX32(maxval, -X[j+c*N]);
         } while (++j<M*eBands[i+1]);

         if (maxval > 0)
         {
            int shift = celt_ilog2(maxval)-10;
            j=M*eBands[i]; do {
               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
            } while (++j<M*eBands[i+1]);
            /* We're adding one here to make damn sure we never end up with a pitch vector that's
               larger than unity norm */
            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
         } else {
            bandE[i+c*m->nbEBands] = EPSILON;
         }
         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
      }
   } while (++c<C);
   /*printf ("\n");*/
}
Exemple #15
0
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
                            int max_pitch, int *best_pitch
#ifdef FIXED_POINT
                            , int yshift, opus_val32 maxcorr
#endif
                            )
{
   int i, j;
   opus_val32 Syy=1;
   opus_val16 best_num[2];
   opus_val32 best_den[2];
#ifdef FIXED_POINT
   int xshift;

   xshift = celt_ilog2(maxcorr)-14;
#endif

   best_num[0] = -1;
   best_num[1] = -1;
   best_den[0] = 0;
   best_den[1] = 0;
   best_pitch[0] = 0;
   best_pitch[1] = 1;
   for (j=0;j<len;j++)
      Syy = MAC16_16(Syy, y[j],y[j]);
   for (i=0;i<max_pitch;i++)
   {
      if (xcorr[i]>0)
      {
         opus_val16 num;
         opus_val32 xcorr16;
         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
         {
            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
            {
               best_num[1] = best_num[0];
               best_den[1] = best_den[0];
               best_pitch[1] = best_pitch[0];
               best_num[0] = num;
               best_den[0] = Syy;
               best_pitch[0] = i;
            } else {
               best_num[1] = num;
               best_den[1] = Syy;
               best_pitch[1] = i;
            }
         }
      }
      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
      Syy = MAX32(1, Syy);
   }
}
Exemple #16
0
void mlp_process(const MLP * m, const opus_val16 * in, opus_val16 * out)
{
	int j;
	opus_val16 hidden[MAX_NEURONS];
	const opus_val16 *W = m->weights;
	/* Copy to tmp_in */
	for (j = 0; j < m->topo[1]; j++) {
		int k;
		opus_val32 sum = SHL32(EXTEND32(*W++), 8);
		for (k = 0; k < m->topo[0]; k++)
			sum = MAC16_16(sum, in[k], *W++);
		hidden[j] = tansig_approx(sum);
	}
	for (j = 0; j < m->topo[2]; j++) {
		int k;
		opus_val32 sum = SHL32(EXTEND32(*W++), 14);
		for (k = 0; k < m->topo[1]; k++)
			sum = MAC16_16(sum, hidden[k], *W++);
		out[j] = tansig_approx(EXTRACT16(PSHR32(sum, 17)));
	}
}
static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
{
   int c, i;
   opus_val32 dist = 0;
   c=0; do {
      for (i=start;i<end;i++)
      {
         opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
         dist = MAC16_16(dist, d,d);
      }
   } while (++c<C);
   return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
}
Exemple #18
0
static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
                        opus_val16 *out, int overlap, int channels,
                        const opus_val16 *window, opus_int32 Fs)
{
    int i, c;
    int inc = 48000/Fs;
    for (c=0; c<channels; c++)
    {
        for (i=0; i<overlap; i++)
        {
            opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
            out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
                                               Q15ONE-w, in1[i*channels+c]), 15);
        }
    }
}
Exemple #19
0
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int _C)
{
   int i, c, N;
   const celt_int16 *eBands = m->eBands;
   const int C = CHANNELS(_C);
   N = FRAMESIZE(m);
   for (c=0;c<C;c++)
   {
      for (i=0;i<m->nbEBands;i++)
      {
         int j;
         celt_word32 maxval=0;
         celt_word32 sum = 0;
         
         j=eBands[i]; do {
            maxval = MAX32(maxval, X[j+c*N]);
            maxval = MAX32(maxval, -X[j+c*N]);
         } while (++j<eBands[i+1]);
         
         if (maxval > 0)
         {
            int shift = celt_ilog2(maxval)-10;
            j=eBands[i]; do {
               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
            } while (++j<eBands[i+1]);
            /* We're adding one here to make damn sure we never end up with a pitch vector that's
               larger than unity norm */
            bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
         } else {
            bank[i+c*m->nbEBands] = EPSILON;
         }
         /*printf ("%f ", bank[i+c*m->nbEBands]);*/
      }
   }
   /*printf ("\n");*/
}
Exemple #20
0
/*Finds the indices of the n-best entries in a codebook with sign*/
void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
   int i,j,k, sign, used;
   used=0;
   for (i=0;i<entries;i++)
   {
      spx_word32_t dist=0;
      for (j=0;j<len;j++)
         dist = MAC16_16(dist,in[j],*codebook++);
      if (dist>0)
      {
         sign=0;
         dist=-dist;
      } else
      {
         sign=1;
      }
#ifdef FIXED_POINT
      dist = ADD32(dist,SHR32(E[i],1));
#else
      dist = ADD32(dist,.5f*E[i]);
#endif
      if (i<N || dist<best_dist[N-1])
      {
         for (k=N-1; (k >= 1) && (k > used || dist < best_dist[k-1]); k--)
         {
            best_dist[k]=best_dist[k-1];
            nbest[k] = nbest[k-1];
         }
         best_dist[k]=dist;
         nbest[k]=i;
         used++;
         if (sign)
            nbest[k]+=entries;
      }
   }
}
Exemple #21
0
opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
      int N)
{
   int i;
   float xy;
   __m128 sum;
   sum = _mm_setzero_ps();
   /* FIXME: We should probably go 8-way and use 2 sums. */
   for (i=0;i<N-3;i+=4)
   {
      __m128 xi = _mm_loadu_ps(x+i);
      __m128 yi = _mm_loadu_ps(y+i);
      sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
   }
   /* Horizontal sum */
   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
   _mm_store_ss(&xy, sum);
   for (;i<N;i++)
   {
      xy = MAC16_16(xy, x[i], y[i]);
   }
   return xy;
}
Exemple #22
0
void celt_iir(const opus_val32 *_x,
         const opus_val16 *den,
         opus_val32 *_y,
         int N,
         int ord,
         opus_val16 *mem,
         int arch)
{
#ifdef SMALL_FOOTPRINT
   int i,j;
   (void)arch;
   for (i=0;i<N;i++)
   {
      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
      {
         sum -= MULT16_16(den[j],mem[j]);
      }
      for (j=ord-1;j>=1;j--)
      {
         mem[j]=mem[j-1];
      }
      mem[0] = ROUND16(sum,SIG_SHIFT);
      _y[i] = sum;
   }
#else
   int i,j;
   VARDECL(opus_val16, rden);
   VARDECL(opus_val16, y);
   SAVE_STACK;

   celt_assert((ord&3)==0);
   ALLOC(rden, ord, opus_val16);
   ALLOC(y, N+ord, opus_val16);
   for(i=0;i<ord;i++)
      rden[i] = den[ord-i-1];
   for(i=0;i<ord;i++)
      y[i] = -mem[ord-i-1];
   for(;i<N+ord;i++)
      y[i]=0;
   for (i=0;i<N-3;i+=4)
   {
      /* Unroll by 4 as if it were an FIR filter */
      opus_val32 sum[4];
      sum[0]=_x[i];
      sum[1]=_x[i+1];
      sum[2]=_x[i+2];
      sum[3]=_x[i+3];
      xcorr_kernel(rden, y+i, sum, ord, arch);

      /* Patch up the result to compensate for the fact that this is an IIR */
      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
      _y[i  ] = sum[0];
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
      _y[i+1] = sum[1];
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
      _y[i+2] = sum[2];

      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
      _y[i+3] = sum[3];
   }
   for (;i<N;i++)
   {
      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
         sum -= MULT16_16(rden[j],y[i+j]);
      y[i+ord] = ROUND16(sum,SIG_SHIFT);
      _y[i] = sum;
   }
   for(i=0;i<ord;i++)
      mem[i] = _y[N-i-1];
   RESTORE_STACK;
#endif
}
Exemple #23
0
int _celt_autocorr(
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
                   const opus_val16       *window,
                   int          overlap,
                   int          lag,
                   int          n,
                   int          arch
                  )
{
   opus_val32 d;
   int i, k;
   int fastN=n-lag;
   int shift;
   const opus_val16 *xptr;
   VARDECL(opus_val16, xx);
   SAVE_STACK;
   ALLOC(xx, n, opus_val16);
   celt_assert(n>0);
   celt_assert(overlap>=0);
   if (overlap == 0)
   {
      xptr = x;
   } else {
      for (i=0;i<n;i++)
         xx[i] = x[i];
      for (i=0;i<overlap;i++)
      {
         xx[i] = MULT16_16_Q15(x[i],window[i]);
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
      }
      xptr = xx;
   }
   shift=0;
#ifdef OPUS_FIXED_POINT
   {
      opus_val32 ac0;
      ac0 = 1+(n<<7);
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
      for(i=(n&1);i<n;i+=2)
      {
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
      }

      shift = celt_ilog2(ac0)-30+10;
      shift = (shift)/2;
      if (shift>0)
      {
         for(i=0;i<n;i++)
            xx[i] = PSHR32(xptr[i], shift);
         xptr = xx;
      } else
         shift = 0;
   }
#endif
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
   for (k=0;k<=lag;k++)
   {
      for (i = k+fastN, d = 0; i < n; i++)
         d = MAC16_16(d, xptr[i], xptr[i-k]);
      ac[k] += d;
   }
#ifdef OPUS_FIXED_POINT
   shift = 2*shift;
   if (shift<=0)
      ac[0] += SHL32((opus_int32)1, -shift);
   if (ac[0] < 268435456)
   {
      int shift2 = 29 - EC_ILOG(ac[0]);
      for (i=0;i<=lag;i++)
         ac[i] = SHL32(ac[i], shift2);
      shift -= shift2;
   } else if (ac[0] >= 536870912)
   {
      int shift2=1;
      if (ac[0] >= 1073741824)
         shift2++;
      for (i=0;i<=lag;i++)
         ac[i] = SHR32(ac[i], shift2);
      shift += shift2;
   }
#endif

   RESTORE_STACK;
   return shift;
}
Exemple #24
0
void bcg729Encoder(bcg729EncoderChannelContextStruct *encoderChannelContext, int16_t inputFrame[], uint8_t bitStream[])
{
	int i;
	uint16_t parameters[NB_PARAMETERS]; /* the output parameters in an array */

	/* internal buffers which we do not need to keep between calls */
	word16_t LPCoefficients[NB_LSP_COEFF]; /* the LP coefficients in Q3.12 */
	word16_t qLPCoefficients[2*NB_LSP_COEFF]; /* the quantized LP coefficients in Q3.12 computed from the qLSP one after interpolation: two sets, one for each subframe */
	word16_t weightedqLPCoefficients[2*NB_LSP_COEFF]; /* the qLP coefficients in Q3.12 weighted according to spec A3.3.3 */
	word16_t LSPCoefficients[NB_LSP_COEFF]; /* the LSP coefficients in Q15 */
	word16_t qLSPCoefficients[NB_LSP_COEFF]; /* the quantized LSP coefficients in Q15 */
	word16_t interpolatedqLSP[NB_LSP_COEFF]; /* the interpolated qLSP used for first subframe in Q15 */


	/*****************************************************************************************/
	/*** on frame basis : preProcessing, LP Analysis, Open-loop pitch search               ***/
	preProcessing(encoderChannelContext, inputFrame, encoderChannelContext->signalLastInputFrame); /* output of the function in the signal buffer */

	computeLP(encoderChannelContext->signalBuffer, LPCoefficients); /* use the whole signal Buffer for windowing and autocorrelation */
	/*** compute LSP: it might fail, get the previous one in this case ***/
	if (!LP2LSPConversion(LPCoefficients, LSPCoefficients)) {
		/* unable to find the 10 roots repeat previous LSP */
		memcpy(LSPCoefficients, encoderChannelContext->previousLSPCoefficients, NB_LSP_COEFF*sizeof(word16_t));
	}

	/*** LSPQuantization and compute L0, L1, L2, L3: the first four parameters ***/
	LSPQuantization(encoderChannelContext, LSPCoefficients, qLSPCoefficients, parameters);
	
	/*** interpolate qLSP and convert to LP ***/
	interpolateqLSP(encoderChannelContext->previousqLSPCoefficients, qLSPCoefficients, interpolatedqLSP);
	/* copy the currentqLSP to previousqLSP buffer */
	for (i=0; i<NB_LSP_COEFF; i++) {
		encoderChannelContext->previousqLSPCoefficients[i] = qLSPCoefficients[i];
	}

	/* first subframe */
	qLSP2LP(interpolatedqLSP, qLPCoefficients);
	/* second subframe */
	qLSP2LP(qLSPCoefficients, &(qLPCoefficients[NB_LSP_COEFF]));

	/*** Compute the weighted Quantized LP Coefficients according to spec A3.3.3 ***/
	/*  weightedqLPCoefficients[0] = qLPCoefficients[0]*Gamma^(i+1) (i=0..9) with Gamma = 0.75 in Q15 */
	weightedqLPCoefficients[0] = MULT16_16_P15(qLPCoefficients[0], GAMMA_E1);
	weightedqLPCoefficients[1] = MULT16_16_P15(qLPCoefficients[1], GAMMA_E2);
	weightedqLPCoefficients[2] = MULT16_16_P15(qLPCoefficients[2], GAMMA_E3);
	weightedqLPCoefficients[3] = MULT16_16_P15(qLPCoefficients[3], GAMMA_E4);
	weightedqLPCoefficients[4] = MULT16_16_P15(qLPCoefficients[4], GAMMA_E5);
	weightedqLPCoefficients[5] = MULT16_16_P15(qLPCoefficients[5], GAMMA_E6);
	weightedqLPCoefficients[6] = MULT16_16_P15(qLPCoefficients[6], GAMMA_E7);
	weightedqLPCoefficients[7] = MULT16_16_P15(qLPCoefficients[7], GAMMA_E8);
	weightedqLPCoefficients[8] = MULT16_16_P15(qLPCoefficients[8], GAMMA_E9);
	weightedqLPCoefficients[9] = MULT16_16_P15(qLPCoefficients[9], GAMMA_E10);
	weightedqLPCoefficients[10] = MULT16_16_P15(qLPCoefficients[10], GAMMA_E1);
	weightedqLPCoefficients[11] = MULT16_16_P15(qLPCoefficients[11], GAMMA_E2);
	weightedqLPCoefficients[12] = MULT16_16_P15(qLPCoefficients[12], GAMMA_E3);
	weightedqLPCoefficients[13] = MULT16_16_P15(qLPCoefficients[13], GAMMA_E4);
	weightedqLPCoefficients[14] = MULT16_16_P15(qLPCoefficients[14], GAMMA_E5);
	weightedqLPCoefficients[15] = MULT16_16_P15(qLPCoefficients[15], GAMMA_E6);
	weightedqLPCoefficients[16] = MULT16_16_P15(qLPCoefficients[16], GAMMA_E7);
	weightedqLPCoefficients[17] = MULT16_16_P15(qLPCoefficients[17], GAMMA_E8);
	weightedqLPCoefficients[18] = MULT16_16_P15(qLPCoefficients[18], GAMMA_E9);
	weightedqLPCoefficients[19] = MULT16_16_P15(qLPCoefficients[19], GAMMA_E10);

	/*** Compute weighted signal according to spec A3.3.3, this function also set LPResidualSignal(entire frame values) as specified in eq A.3 in excitationVector[L_PAST_EXCITATION] ***/
	computeWeightedSpeech(encoderChannelContext->signalCurrentFrame, qLPCoefficients, weightedqLPCoefficients, &(encoderChannelContext->weightedInputSignal[MAXIMUM_INT_PITCH_DELAY]), &(encoderChannelContext->excitationVector[L_PAST_EXCITATION])); /* weightedInputSignal contains MAXIMUM_INT_PITCH_DELAY values from previous frame, points to current frame  */

	/*** find the open loop pitch delay ***/
	uint16_t openLoopPitchDelay = findOpenLoopPitchDelay(&(encoderChannelContext->weightedInputSignal[MAXIMUM_INT_PITCH_DELAY]));

	/* define boundaries for closed loop pitch delay search as specified in 3.7 */
	int16_t intPitchDelayMin = openLoopPitchDelay-3;
	if (intPitchDelayMin < 20) {
		intPitchDelayMin = 20;
	}
	int16_t intPitchDelayMax = intPitchDelayMin + 6;
	if (intPitchDelayMax > MAXIMUM_INT_PITCH_DELAY) {
		intPitchDelayMax = MAXIMUM_INT_PITCH_DELAY;
		intPitchDelayMin = MAXIMUM_INT_PITCH_DELAY - 6;
	}

	/*****************************************************************************************/
	/* loop over the two subframes: Closed-loop pitch search(adaptative codebook), fixed codebook, memory update */
	/* set index and buffers */
	int subframeIndex;
	int LPCoefficientsIndex = 0;
	int parametersIndex = 4; /* index to insert parameters in the parameters output array */
	word16_t impulseResponseInput[L_SUBFRAME]; /* input buffer for the impulse response computation: in Q12, 1 followed by all zeros see spec A3.5*/
	impulseResponseInput[0] = ONE_IN_Q12;
	memset(&(impulseResponseInput[1]), 0, (L_SUBFRAME-1)*sizeof(word16_t));

	for (subframeIndex=0; subframeIndex<L_FRAME; subframeIndex+=L_SUBFRAME) {
		/*** Compute the impulse response : filter a subframe long buffer filled with unit and only zero through the 1/weightedqLPCoefficients as in spec A.3.5 ***/
		word16_t impulseResponseBuffer[NB_LSP_COEFF+L_SUBFRAME]; /* impulseResponseBuffer in Q12, need NB_LSP_COEFF as past value to go through filtering function */
		memset(impulseResponseBuffer, 0, (NB_LSP_COEFF)*sizeof(word16_t)); /* set the past values to zero */
		synthesisFilter(impulseResponseInput, &(weightedqLPCoefficients[LPCoefficientsIndex]), &(impulseResponseBuffer[NB_LSP_COEFF]));
	
		/*** Compute the target signal (x[n]) as in spec A.3.6 in Q0 ***/
		/* excitationVector[L_PAST_EXCITATION+subframeIndex] currently store in Q0 the LPResidualSignal as in spec A.3.3 eq A.3*/
		synthesisFilter( &(encoderChannelContext->excitationVector[L_PAST_EXCITATION+subframeIndex]), &(weightedqLPCoefficients[LPCoefficientsIndex]), &(encoderChannelContext->targetSignal[NB_LSP_COEFF]));

		/*** Adaptative Codebook search : compute the intPitchDelay, fracPitchDelay and associated parameter, compute also the adaptative codebook vector used to generate the excitation ***/
		/* after this call, the excitationVector[L_PAST_EXCITATION + subFrameIndex] contains the adaptative codebook vector as in spec 3.7.1 */
		int16_t intPitchDelay, fracPitchDelay;
		adaptativeCodebookSearch(&(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex]), &intPitchDelayMin, &intPitchDelayMax, &(impulseResponseBuffer[NB_LSP_COEFF]), &(encoderChannelContext->targetSignal[NB_LSP_COEFF]),
			&intPitchDelay, &fracPitchDelay, &(parameters[parametersIndex]), subframeIndex);

		/*** Compute adaptative codebook gain spec 3.7.3, result in Q14 ***/
		/* compute the filtered adaptative codebook vector spec 3.7.3 */
		/* this computation makes use of two partial results used for gainQuantization too (yy and xy in eq63), they are part of the function output */
		/* note spec 3.7.3 eq44 make use of convolution of impulseResponse and adaptative codebook vector to compute the filtered version */
		/* in the Annex A, the filter being simpler, it's faster to directly filter the the vector using the  weightedqLPCoefficients */
		word16_t filteredAdaptativeCodebookVector[NB_LSP_COEFF+L_SUBFRAME]; /* in Q0, the first NB_LSP_COEFF words are set to zero and used by filter only */
		memset(filteredAdaptativeCodebookVector, 0, NB_LSP_COEFF*sizeof(word16_t));
		synthesisFilter(&(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex]), &(weightedqLPCoefficients[LPCoefficientsIndex]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]));

		word64_t gainQuantizationXy, gainQuantizationYy; /* used to store in Q0 values reused in gain quantization */

		word16_t adaptativeCodebookGain = computeAdaptativeCodebookGain(&(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), &gainQuantizationXy, &gainQuantizationYy); /* gain in Q14 */
		
		/* increase parameters index and compute P0 if needed */
		parametersIndex++;
		if (subframeIndex==0) { /* first subframe compute P0, the parity bit of P1 */
			parameters[parametersIndex] = computeParity(parameters[parametersIndex-1]);
			parametersIndex++;
		}

		/*** Fixed Codebook Search : compute the parameters for fixed codebook and the regular and convolved version of the fixed codebook vector ***/
		word16_t fixedCodebookVector[L_SUBFRAME]; /* in Q13 */
		word16_t convolvedFixedCodebookVector[L_SUBFRAME]; /* in Q12 */
		fixedCodebookSearch(&(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(impulseResponseBuffer[NB_LSP_COEFF]), intPitchDelay, encoderChannelContext->lastQuantizedAdaptativeCodebookGain, &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), adaptativeCodebookGain,
			&(parameters[parametersIndex]), &(parameters[parametersIndex+1]), fixedCodebookVector, convolvedFixedCodebookVector);
		parametersIndex+=2;

		/*** gains Quantization ***/
		word16_t quantizedAdaptativeCodebookGain; /* in Q14 */
		word16_t quantizedFixedCodebookGain; /* in Q1 */
		gainQuantization(encoderChannelContext, &(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), convolvedFixedCodebookVector, fixedCodebookVector, gainQuantizationXy, gainQuantizationYy,
			&quantizedAdaptativeCodebookGain, &quantizedFixedCodebookGain, &(parameters[parametersIndex]), &(parameters[parametersIndex+1]));
		parametersIndex+=2;
		
		/*** subframe basis indexes and memory updates ***/
		LPCoefficientsIndex+= NB_LSP_COEFF;
		encoderChannelContext->lastQuantizedAdaptativeCodebookGain = quantizedAdaptativeCodebookGain;
		if (encoderChannelContext->lastQuantizedAdaptativeCodebookGain>ONE_POINT_2_IN_Q14) encoderChannelContext->lastQuantizedAdaptativeCodebookGain = ONE_POINT_2_IN_Q14;
		if (encoderChannelContext->lastQuantizedAdaptativeCodebookGain<O2_IN_Q14) encoderChannelContext->lastQuantizedAdaptativeCodebookGain = O2_IN_Q14;
		/* compute excitation for current subframe as in spec A.3.10 */
		/* excitationVector[L_PAST_EXCITATION + subframeIndex] currently contains in Q0 the adaptative codebook vector, quantizedAdaptativeCodebookGain in Q14 */
		/* fixedCodebookVector in Q13, quantizedFixedCodebookGain in Q1 */
		for (i=0; i<L_SUBFRAME; i++) {
			encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex + i] = (word16_t)(SATURATE(PSHR(ADD32(MULT16_16(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex + i], quantizedAdaptativeCodebookGain),
											MULT16_16(fixedCodebookVector[i], quantizedFixedCodebookGain)), 14), MAXINT16)); /* result in Q0 */
		}

		/* update targetSignal memory as in spec A.3.10 */
		quantizedAdaptativeCodebookGain = PSHR(quantizedAdaptativeCodebookGain, 1); /* quantizedAdaptativeCodebookGain in Q13 */
		for (i=0; i<NB_LSP_COEFF; i++) {
			/* targetSignal[i] = targetSignal[L_SUBFRAME+i] - quantizedAdaptativeCodebookGain*filteredAdaptativeCodebookVector[L_SUBFRAME+i] - quantizedFixedCodebookGain*convolvedFixedCodebookVector[L_SUBFRAME-NB_LSP_COEFF+i]*/
			word32_t acc = MAC16_16(MULT16_16(quantizedAdaptativeCodebookGain, filteredAdaptativeCodebookVector[L_SUBFRAME+i]), quantizedFixedCodebookGain, convolvedFixedCodebookVector[L_SUBFRAME-NB_LSP_COEFF+i]); /* acc in Q13 */
			encoderChannelContext->targetSignal[i] = (word16_t)(SATURATE(SUB32(encoderChannelContext->targetSignal[L_SUBFRAME+i], PSHR(acc, 13)), MAXINT16));
			
		}
	}

	/*****************************************************************************************/
	/*** frame basis memory updates                                                        ***/
	/* shift left by L_FRAME the signal buffer */
	memmove(encoderChannelContext->signalBuffer, &(encoderChannelContext->signalBuffer[L_FRAME]), (L_LP_ANALYSIS_WINDOW-L_FRAME)*sizeof(word16_t)); 
	/* update previousLSP coefficient buffer */
	memcpy(encoderChannelContext->previousLSPCoefficients, LSPCoefficients, NB_LSP_COEFF*sizeof(word16_t));
	memcpy(encoderChannelContext->previousqLSPCoefficients, qLSPCoefficients, NB_LSP_COEFF*sizeof(word16_t));
	/* shift left by L_FRAME the weightedInputSignal buffer */
	memmove(encoderChannelContext->weightedInputSignal, &(encoderChannelContext->weightedInputSignal[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t));
	/* shift left by L_FRAME the excitationVector */
	memmove(encoderChannelContext->excitationVector, &(encoderChannelContext->excitationVector[L_FRAME]), L_PAST_EXCITATION*sizeof(word16_t));

	/*** Convert array of parameters into bitStream ***/
	parametersArray2BitStream(parameters, bitStream);

	return;
}
Exemple #25
0
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
static spx_word32_t pitch_gain_search_3tap(
    const spx_word16_t target[],       /* Target vector */
    const spx_coef_t ak[],          /* LPCs for this subframe */
    const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
    const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
    spx_sig_t exc[],                /* Excitation */
    const signed char* gain_cdbk,
    int gain_cdbk_size,
    int   pitch,                    /* Pitch value */
    int   p,                        /* Number of LPC coeffs */
    int   nsf,                      /* Number of samples in subframe */
    SpeexBits* bits,
    char* stack,
    const spx_word16_t* exc2,
    const spx_word16_t* r,
    spx_word16_t* new_target,
    int*  cdbk_index,
    int plc_tuning,
    spx_word32_t cumul_gain,
    int scaledown
) {
    int i, j;
    VARDECL(spx_word16_t * tmp1);
    VARDECL(spx_word16_t * e);
    spx_word16_t* x[3];
    spx_word32_t corr[3];
    spx_word32_t A[3][3];
    spx_word16_t gain[3];
    spx_word32_t err;
    spx_word16_t max_gain = 128;
    int          best_cdbk = 0;

    ALLOC(tmp1, 3 * nsf, spx_word16_t);
    ALLOC(e, nsf, spx_word16_t);

    if (cumul_gain > 262144)
        max_gain = 31;

    x[0] = tmp1;
    x[1] = tmp1 + nsf;
    x[2] = tmp1 + 2 * nsf;

    for (j = 0; j < nsf; j++)
        new_target[j] = target[j];

    {
        VARDECL(spx_mem_t * mm);
        int pp = pitch - 1;
        ALLOC(mm, p, spx_mem_t);
        for (j = 0; j < nsf; j++) {
            if (j - pp < 0)
                e[j] = exc2[j - pp];
            else if (j - pp - pitch < 0)
                e[j] = exc2[j - pp - pitch];
            else
                e[j] = 0;
        }
#ifdef FIXED_POINT
        /* Scale target and excitation down if needed (avoiding overflow) */
        if (scaledown) {
            for (j = 0; j < nsf; j++)
                e[j] = SHR16(e[j], 1);
            for (j = 0; j < nsf; j++)
                new_target[j] = SHR16(new_target[j], 1);
        }
#endif
        for (j = 0; j < p; j++)
            mm[j] = 0;
        iir_mem16(e, ak, e, nsf, p, mm, stack);
        for (j = 0; j < p; j++)
            mm[j] = 0;
        filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
        for (j = 0; j < nsf; j++)
            x[2][j] = e[j];
    }
    for (i = 1; i >= 0; i--) {
        spx_word16_t e0 = exc2[-pitch - 1 + i];
#ifdef FIXED_POINT
        /* Scale excitation down if needed (avoiding overflow) */
        if (scaledown)
            e0 = SHR16(e0, 1);
#endif
        x[i][0] = MULT16_16_Q14(r[0], e0);
        for (j = 0; j < nsf - 1; j++)
            x[i][j + 1] = ADD32(x[i + 1][j], MULT16_16_P14(r[j + 1], e0));
    }

    for (i = 0; i < 3; i++)
        corr[i] = inner_prod(x[i], new_target, nsf);
    for (i = 0; i < 3; i++)
        for (j = 0; j <= i; j++)
            A[i][j] = A[j][i] = inner_prod(x[i], x[j], nsf);

    {
        spx_word32_t C[9];
#ifdef FIXED_POINT
        spx_word16_t C16[9];
#else
        spx_word16_t* C16 = C;
#endif
        C[0] = corr[2];
        C[1] = corr[1];
        C[2] = corr[0];
        C[3] = A[1][2];
        C[4] = A[0][1];
        C[5] = A[0][2];
        C[6] = A[2][2];
        C[7] = A[1][1];
        C[8] = A[0][0];

        /*plc_tuning *= 2;*/
        if (plc_tuning < 2)
            plc_tuning = 2;
        if (plc_tuning > 30)
            plc_tuning = 30;
#ifdef FIXED_POINT
        C[0] = SHL32(C[0], 1);
        C[1] = SHL32(C[1], 1);
        C[2] = SHL32(C[2], 1);
        C[3] = SHL32(C[3], 1);
        C[4] = SHL32(C[4], 1);
        C[5] = SHL32(C[5], 1);
        C[6] = MAC16_32_Q15(C[6], MULT16_16_16(plc_tuning, 655), C[6]);
        C[7] = MAC16_32_Q15(C[7], MULT16_16_16(plc_tuning, 655), C[7]);
        C[8] = MAC16_32_Q15(C[8], MULT16_16_16(plc_tuning, 655), C[8]);
        normalize16(C, C16, 32767, 9);
#else
        C[6] *= .5 * (1 + .02 * plc_tuning);
        C[7] *= .5 * (1 + .02 * plc_tuning);
        C[8] *= .5 * (1 + .02 * plc_tuning);
#endif

        best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);

#ifdef FIXED_POINT
        gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4]);
        gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 1]);
        gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 2]);
        /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
#else
        gain[0] = 0.015625 * gain_cdbk[best_cdbk * 4]  + .5;
        gain[1] = 0.015625 * gain_cdbk[best_cdbk * 4 + 1] + .5;
        gain[2] = 0.015625 * gain_cdbk[best_cdbk * 4 + 2] + .5;
#endif
        *cdbk_index = best_cdbk;
    }

    SPEEX_MEMSET(exc, 0, nsf);
    for (i = 0; i < 3; i++) {
        int j;
        int tmp1, tmp3;
        int pp = pitch + 1 - i;
        tmp1 = nsf;
        if (tmp1 > pp)
            tmp1 = pp;
        for (j = 0; j < tmp1; j++)
            exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp]);
        tmp3 = nsf;
        if (tmp3 > pp + pitch)
            tmp3 = pp + pitch;
        for (j = tmp1; j < tmp3; j++)
            exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp - pitch]);
    }
    for (i = 0; i < nsf; i++) {
        spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0], x[2][i]), MULT16_16(gain[1], x[1][i])),
                                 MULT16_16(gain[2], x[0][i]));
        new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp, 6)));
    }
    err = inner_prod(new_target, new_target, nsf);

    return err;
}
void split_cb_search_shape_sign(
spx_word16_t target[],			/* target vector */
spx_coef_t ak[],			/* LPCs for this subframe */
spx_coef_t awk1[],			/* Weighted LPCs for this subframe */
spx_coef_t awk2[],			/* Weighted LPCs for this subframe */
const void *par,                      /* Codebook/search parameters*/
int   p,                        /* number of LPC coeffs */
int   nsf,                      /* number of samples in subframe */
spx_sig_t *exc,
spx_word16_t *r,
SpeexBits *bits,
char *stack,
int   complexity,
int   update_target
)
{
   int i,j,k,m,n,q;
   VARDECL(spx_word16_t *resp);
#ifdef _USE_SSE
   VARDECL(__m128 *resp2);
   VARDECL(__m128 *E);
#else
   spx_word16_t *resp2;
   VARDECL(spx_word32_t *E);
#endif
   VARDECL(spx_word16_t *t);
   VARDECL(spx_sig_t *e);
   VARDECL(spx_word16_t *tmp);
   VARDECL(spx_word32_t *ndist);
   VARDECL(spx_word32_t *odist);
   VARDECL(int *itmp);
   VARDECL(spx_word16_t **ot2);
   VARDECL(spx_word16_t **nt2);
   spx_word16_t **ot, **nt;
   VARDECL(int **nind);
   VARDECL(int **oind);
   VARDECL(int *ind);
   const signed char *shape_cb;
   int shape_cb_size, subvect_size, nb_subvect;
   const split_cb_params *params;
   int N=2;
   VARDECL(int *best_index);
   VARDECL(spx_word32_t *best_dist);
   VARDECL(int *best_nind);
   VARDECL(int *best_ntarget);
   int have_sign;
   N=complexity;
   if (N>10)
      N=10;
   /* Complexity isn't as important for the codebooks as it is for the pitch */
   N=(2*N)/3;
   if (N<1)
      N=1;
   if (N==1)
   {
      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
      return;
   }
   ALLOC(ot2, N, spx_word16_t*);
   ALLOC(nt2, N, spx_word16_t*);
   ALLOC(oind, N, int*);
   ALLOC(nind, N, int*);

   params = (const split_cb_params *) par;
   subvect_size = params->subvect_size;
   nb_subvect = params->nb_subvect;
   shape_cb_size = 1<<params->shape_bits;
   shape_cb = params->shape_cb;
   have_sign = params->have_sign;
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
#ifdef _USE_SSE
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
   ALLOC(E, shape_cb_size>>2, __m128);
#else
   resp2 = resp;
   ALLOC(E, shape_cb_size, spx_word32_t);
#endif
   ALLOC(t, nsf, spx_word16_t);
   ALLOC(e, nsf, spx_sig_t);
   ALLOC(ind, nb_subvect, int);

   ALLOC(tmp, 2*N*nsf, spx_word16_t);
   for (i=0;i<N;i++)
   {
      ot2[i]=tmp+2*i*nsf;
      nt2[i]=tmp+(2*i+1)*nsf;
   }
   ot=ot2;
   nt=nt2;
   ALLOC(best_index, N, int);
   ALLOC(best_dist, N, spx_word32_t);
   ALLOC(best_nind, N, int);
   ALLOC(best_ntarget, N, int);
   ALLOC(ndist, N, spx_word32_t);
   ALLOC(odist, N, spx_word32_t);
   
   ALLOC(itmp, 2*N*nb_subvect, int);
   for (i=0;i<N;i++)
   {
      nind[i]=itmp+2*i*nb_subvect;
      oind[i]=itmp+(2*i+1)*nb_subvect;
   }
   
   SPEEX_COPY(t, target, nsf);

   for (j=0;j<N;j++)
      SPEEX_COPY(&ot[j][0], t, nsf);

   /* Pre-compute codewords response and energy */
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);

   for (j=0;j<N;j++)
      odist[j]=0;
   
   /*For all subvectors*/
   for (i=0;i<nb_subvect;i++)
   {
      /*"erase" nbest list*/
      for (j=0;j<N;j++)
         ndist[j]=VERY_LARGE32;
      /* This is not strictly necessary, but it provides an additonal safety 
         to prevent crashes in case something goes wrong in the previous
         steps (e.g. NaNs) */
      for (j=0;j<N;j++)
         best_nind[j] = best_ntarget[j] = 0;
      /*For all n-bests of previous subvector*/
      for (j=0;j<N;j++)
      {
         spx_word16_t *x=ot[j]+subvect_size*i;
         spx_word32_t tener = 0;
         for (m=0;m<subvect_size;m++)
            tener = MAC16_16(tener, x[m],x[m]);
#ifdef FIXED_POINT
         tener = SHR32(tener,1);
#else
         tener *= .5;
#endif
         /*Find new n-best based on previous n-best j*/
#ifndef DISABLE_WIDEBAND
         if (have_sign)
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
         else
#endif /* DISABLE_WIDEBAND */
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);

         /*For all new n-bests*/
         for (k=0;k<N;k++)
         {
            /* Compute total distance (including previous sub-vectors */
            spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
            
            /*update n-best list*/
            if (err<ndist[N-1])
            {
               for (m=0;m<N;m++)
               {
                  if (err < ndist[m])
                  {
                     for (n=N-1;n>m;n--)
                     {
                        ndist[n] = ndist[n-1];
                        best_nind[n] = best_nind[n-1];
                        best_ntarget[n] = best_ntarget[n-1];
                     }
                     /* n is equal to m here, so they're interchangeable */
                     ndist[m] = err;
                     best_nind[n] = best_index[k];
                     best_ntarget[n] = j;
                     break;
                  }
               }
            }
         }
         if (i==0)
            break;
      }
      for (j=0;j<N;j++)
      {
         /*previous target (we don't care what happened before*/
         for (m=(i+1)*subvect_size;m<nsf;m++)
            nt[j][m]=ot[best_ntarget[j]][m];
         
         /* New code: update the rest of the target only if it's worth it */
         for (m=0;m<subvect_size;m++)
         {
            spx_word16_t g;
            int rind;
            spx_word16_t sign=1;
            rind = best_nind[j];
            if (rind>=shape_cb_size)
            {
               sign=-1;
               rind-=shape_cb_size;
            }

            q=subvect_size-m;
#ifdef FIXED_POINT
            g=sign*shape_cb[rind*subvect_size+m];
#else
            g=sign*0.03125*shape_cb[rind*subvect_size+m];
#endif
            target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
         }

         for (q=0;q<nb_subvect;q++)
            nind[j][q]=oind[best_ntarget[j]][q];
         nind[j][i]=best_nind[j];
      }

      /*update old-new data*/
      /* just swap pointers instead of a long copy */
      {
         spx_word16_t **tmp2;
         tmp2=ot;
         ot=nt;
         nt=tmp2;
      }
      for (j=0;j<N;j++)
         for (m=0;m<nb_subvect;m++)
            oind[j][m]=nind[j][m];
      for (j=0;j<N;j++)
         odist[j]=ndist[j];
   }

   /*save indices*/
   for (i=0;i<nb_subvect;i++)
   {
      ind[i]=nind[0][i];
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
   }
   
   /* Put everything back together */
   for (i=0;i<nb_subvect;i++)
   {
      int rind;
      spx_word16_t sign=1;
      rind = ind[i];
      if (rind>=shape_cb_size)
      {
         sign=-1;
         rind-=shape_cb_size;
      }
#ifdef FIXED_POINT
      if (sign==1)
      {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
      } else {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
      }
#else
      for (j=0;j<subvect_size;j++)
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
#endif
   }   
   /* Update excitation */
   for (j=0;j<nsf;j++)
      exc[j]=ADD32(exc[j],e[j]);
   
   /* Update target: only update target if necessary */
   if (update_target)
   {
      VARDECL(spx_word16_t *r2);
      ALLOC(r2, nsf, spx_word16_t);
      for (j=0;j<nsf;j++)
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
      for (j=0;j<nsf;j++)
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
   }
}
Exemple #27
0
void split_cb_search_shape_sign(
spx_word16_t target[],			/* target vector */
spx_coef_t ak[],			/* LPCs for this subframe */
spx_coef_t awk1[],			/* Weighted LPCs for this subframe */
spx_coef_t awk2[],			/* Weighted LPCs for this subframe */
const void *par,                      /* Codebook/search parameters*/
int   p,                        /* number of LPC coeffs */
int   nsf,                      /* number of samples in subframe */
spx_sig_t *exc,
spx_word16_t *r,
SpeexBits *bits,
char *stack,
int   complexity,
int   update_target
)
{
   int i,j,m,q;
   const signed char *shape_cb;
   int shape_cb_size = 32, subvect_size = 10;
   int best_index;
   spx_word32_t best_dist;
   spx_word16_t resp[320];
   spx_word16_t *resp2 = resp;
   spx_word32_t E[32];
   spx_word16_t t[40];
   spx_sig_t  e[40];
   shape_cb=exc_10_32_table;

   
   /* FIXME: Do we still need to copy the target? */
   SPEEX_COPY(t, target, nsf);

   //compute_weighted_codebook
   {
     int i, k;
     spx_word16_t shape[10];
	 for (i=0;i<shape_cb_size;i++)
     {
       spx_word16_t *res;
      
       res = resp+i*subvect_size;
       for (k=0;k<subvect_size;k++)
          shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
       E[i]=0;

       /* Compute codeword response using convolution with impulse response */
       {
	     spx_word32_t resj;
         spx_word16_t res16;
	  	 
		 // 0          
         resj = MULT16_16(shape[0],r[0]);
		 res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[0] = res16;
         //++++++++++++++++++++++++++
         
		 // 1          
         resj = MULT16_16(shape[0],r[1]);    
		 resj = MAC16_16(resj,shape[1],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[1] = res16;
         //++++++++++++++++++++++++++
         
         // 2         
         resj = MULT16_16(shape[0],r[2]);    
		 resj = MAC16_16(resj,shape[1],r[1]);
         resj = MAC16_16(resj,shape[2],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[2] = res16;
         //++++++++++++++++++++++++++
         
         // 3          
         resj = MULT16_16(shape[0],r[3]);
         resj = MAC16_16(resj,shape[1],r[2]);
         resj = MAC16_16(resj,shape[2],r[1]);
		 resj = MAC16_16(resj,shape[3],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[3] = res16;
         //++++++++++++++++++++++++++
         
         // 4        
         resj = MULT16_16(shape[0],r[4]);
         resj = MAC16_16(resj,shape[1],r[3]);
         resj = MAC16_16(resj,shape[2],r[2]);
         resj = MAC16_16(resj,shape[3],r[1]);
		 resj = MAC16_16(resj,shape[4],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[4] = res16;
         //++++++++++++++++++++++++++
         
         // 5   
         resj = MULT16_16(shape[0],r[5]);
         resj = MAC16_16(resj,shape[1],r[4]);
         resj = MAC16_16(resj,shape[2],r[3]);
         resj = MAC16_16(resj,shape[3],r[2]);
         resj = MAC16_16(resj,shape[4],r[1]);
		 resj = MAC16_16(resj,shape[5],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[5] = res16;
         //++++++++++++++++++++++++++
         
         // 6         
         resj = MULT16_16(shape[0],r[6]);
         resj = MAC16_16(resj,shape[1],r[5]);
         resj = MAC16_16(resj,shape[2],r[4]);
         resj = MAC16_16(resj,shape[3],r[3]);
         resj = MAC16_16(resj,shape[4],r[2]);
         resj = MAC16_16(resj,shape[5],r[1]);
		 resj = MAC16_16(resj,shape[6],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[6] = res16;
         //++++++++++++++++++++++++++
         
         // 7 
         resj = MULT16_16(shape[0],r[7]);
         resj = MAC16_16(resj,shape[1],r[6]);
         resj = MAC16_16(resj,shape[2],r[5]);
         resj = MAC16_16(resj,shape[3],r[4]);
         resj = MAC16_16(resj,shape[4],r[3]);
         resj = MAC16_16(resj,shape[5],r[2]);
         resj = MAC16_16(resj,shape[6],r[1]);
		 resj = MAC16_16(resj,shape[7],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[7] = res16;
         //++++++++++++++++++++++++++
         
         // 8          
         resj = MULT16_16(shape[0],r[8]);
         resj = MAC16_16(resj,shape[1],r[7]);
         resj = MAC16_16(resj,shape[2],r[6]);
         resj = MAC16_16(resj,shape[3],r[5]);
         resj = MAC16_16(resj,shape[4],r[4]);
         resj = MAC16_16(resj,shape[5],r[3]);
         resj = MAC16_16(resj,shape[6],r[2]);
         resj = MAC16_16(resj,shape[7],r[1]);
		 resj = MAC16_16(resj,shape[8],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[8] = res16;
         //++++++++++++++++++++++++++
         
         // 9       
         resj = MULT16_16(shape[0],r[9]);
         resj = MAC16_16(resj,shape[1],r[8]);
         resj = MAC16_16(resj,shape[2],r[7]);
         resj = MAC16_16(resj,shape[3],r[6]);
         resj = MAC16_16(resj,shape[4],r[5]);
         resj = MAC16_16(resj,shape[5],r[4]);
         resj = MAC16_16(resj,shape[6],r[3]);
         resj = MAC16_16(resj,shape[7],r[2]);
         resj = MAC16_16(resj,shape[8],r[1]);
		 resj = MAC16_16(resj,shape[9],r[0]);
         res16 = EXTRACT16(SHR32(resj, 13));
         // Compute codeword energy 
         E[i]=MAC16_16(E[i],res16,res16);
         res[9] = res16;
         //++++++++++++++++++++++++++
       }
     }
   }

   for (i=0;i<4;i++)
   {
      spx_word16_t *x=t+subvect_size*i;
      /*Find new n-best based on previous n-best j*/
      vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
      
      speex_bits_pack(bits,best_index,5);
      
      {
         int rind;
         spx_word16_t *res;
         spx_word16_t sign=1;
         rind = best_index;
         if (rind>=shape_cb_size)
         {
            sign=-1;
            rind-=shape_cb_size;
         }
         res = resp+rind*subvect_size;
         if (sign>0)
            for (m=0;m<subvect_size;m++)
               t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
         else
            for (m=0;m<subvect_size;m++)
               t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);

         if (sign==1)
         {
            for (j=0;j<subvect_size;j++)
               e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
         } else {
            for (j=0;j<subvect_size;j++)
               e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
         }
      
      }
            
      for (m=0;m<subvect_size;m++)
      {
         spx_word16_t g;
         int rind;
         spx_word16_t sign=1;
         rind = best_index;
         if (rind>=shape_cb_size)
         {
            sign=-1;
            rind-=shape_cb_size;
         }
         
         q=subvect_size-m;
         g=sign*shape_cb[rind*subvect_size+m];

         target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
      }
   }

   /* Update excitation */
   /* FIXME: We could update the excitation directly above */
   for (j=0;j<nsf;j++)
      exc[j]=ADD32(exc[j],e[j]);

}
void decodeAdaptativeCodeVector(bcg729DecoderChannelContextStruct *decoderChannelContext, int subFrameIndex, uint16_t adaptativeCodebookIndex, uint8_t parityFlag, uint8_t frameErasureFlag,
				int16_t *intPitchDelay, word16_t *excitationVector)
{
	int16_t fracPitchDelay;
	/*** Compute the Pitch Delay from the Codebook index ***/
	/* fracPitchDelay is computed in the range -1,0,1 */
	if (subFrameIndex == 0 ) { /* first subframe */
		if (parityFlag|frameErasureFlag) { /* there is an error (either parity or frame erased) */
			*intPitchDelay = decoderChannelContext->previousIntPitchDelay; /* set the integer part of Pitch Delay to the last second subframe Pitch Delay computed spec: 4.1.2 */
			/* Note: unable to find anything regarding this part in the spec, just copied it from the ITU source code */
			fracPitchDelay = 0;
			decoderChannelContext->previousIntPitchDelay++;
			if (decoderChannelContext->previousIntPitchDelay>MAXIMUM_INT_PITCH_DELAY) decoderChannelContext->previousIntPitchDelay=MAXIMUM_INT_PITCH_DELAY;
		} else { /* parity and frameErasure flags are off, do the normal computation (doc 4.1.3) */
			if (adaptativeCodebookIndex<197) {
				/* *intPitchDelay = (P1 + 2 )/ 3 + 19 */
				*intPitchDelay = ADD16(MULT16_16_Q15(ADD16(adaptativeCodebookIndex,2), 10923), 19); /* MULT in Q15: 1/3 in Q15: 10923 */
				/* fracPitchDelay = P1 − 3*intPitchDelay  + 58 : fracPitchDelay in -1, 0, 1 */
				fracPitchDelay = ADD16(SUB16(adaptativeCodebookIndex, MULT16_16(*intPitchDelay, 3)), 58);
			} else {/* adaptativeCodebookIndex>= 197 */
				*intPitchDelay = SUB16(adaptativeCodebookIndex, 112);
				fracPitchDelay = 0;
			}

			/* backup the intPitchDelay */
			decoderChannelContext->previousIntPitchDelay = *intPitchDelay;
		}
	} else { /* second subframe */
		if (frameErasureFlag) { /* there is an error : frame erased, in case of parity error, it has been taken in account at first subframe */
			/* unable to find anything regarding this part in the spec, just copied it from the ITU source code */
			*intPitchDelay = decoderChannelContext->previousIntPitchDelay;
			fracPitchDelay = 0;
			decoderChannelContext->previousIntPitchDelay++;
			if (decoderChannelContext->previousIntPitchDelay>MAXIMUM_INT_PITCH_DELAY) decoderChannelContext->previousIntPitchDelay=MAXIMUM_INT_PITCH_DELAY;
		} else { /* frameErasure flags are off, do the normal computation (doc 4.1.3) */
			int16_t tMin = SUB16(*intPitchDelay,5); /* intPitchDelay contains the intPitch computed for subframe one */
			if (tMin<20) {
				tMin = 20;
			}
			if (tMin>134) {
				tMin = 134;
			}
			/* intPitchDelay = (P2 + 2 )/ 3 − 1 */
			*intPitchDelay = SUB16(MULT16_16_Q15(ADD16(adaptativeCodebookIndex, 2), 10923), 1);
 			/* fracPitchDelay = P2 − 2 − 3((P 2 + 2 )/ 3 − 1) */
			fracPitchDelay = SUB16(SUB16(adaptativeCodebookIndex, MULT16_16(*intPitchDelay, 3)), 2);
			/* *intPitchDelay = (P2 + 2 )/ 3 − 1 + tMin */
			*intPitchDelay = ADD16(*intPitchDelay,tMin);

			/* backup the intPitchDelay */
			decoderChannelContext->previousIntPitchDelay = *intPitchDelay;
		}
	}


	/* now compute the adaptative codebook vector using the pitch delay we just get and the past excitation vector */
	/* from spec 4.1.3 and 3.7.1 */
	/* shall compute v(n ) = ∑ u (n - k + i )b30 (t + 3i ) + ∑ u (n - k + 1 + i )b30 (3 - t + 3i ) for i=0,...,9 and n = 0,...,39 (t in 0, 1, 2) */
	/* with k = intPitchDelay and t = fracPitchDelay wich must be converted from range -1,0,1 to 0,1,2 */
	/* u the past excitation vector */
	/* v the adaptative codebook vector */
	/* b30 an interpolation filter */

	word16_t *excitationVectorMinusK; /* pointer to u(-k) */

	/* scale fracPichDelay from -1,0.1 to 0,1,2 */
	if (fracPitchDelay==1) {
		excitationVectorMinusK = &(excitationVector[-(*intPitchDelay+1)]); /* fracPitchDelay being positive -> increase by one the integer part and set to 2 the fractional part : -(k+1/3) -> -(k+1)+2/3 */
		fracPitchDelay = 2;
	} else {
		fracPitchDelay = -fracPitchDelay; /* 0 unchanged, -1 -> +1 */
		excitationVectorMinusK = &(excitationVector[-(*intPitchDelay)]); /* -(k-1/3) -> -k+1/3  or -(k) -> -k*/
	}

	int n;
	for (n=0; n<L_SUBFRAME; n++) { /* loop over the whole subframe */
		word16_t *excitationVectorNMinusK = &(excitationVectorMinusK[n]); /* point to u(n-k), unscaled value, full range */
		word16_t *excitationVectorNMinusKPlusOne = &(excitationVectorMinusK[n+1]); /* point to u(n-k+1), unscaled value, full range */

		word16_t *b301 = &(b30[fracPitchDelay]); /* point to b30(t) in Q0.15 : sums of all b30 coeffs is < 2, no overflow possible on 32 bits */
		word16_t *b302 = &(b30[3-fracPitchDelay]); /* point to b30(3-t) in Q0.15*/
		int i,j; /* j will store 3i */
		word32_t acc = 0; /* in Q15 */
		for (i=0, j=0; i<10; i++, j+=3) {
			acc = MAC16_16(acc, excitationVectorNMinusK[-i], b301[j]); /*  Note : the spec says: u(n−k+i)b30(t+3i) but the ITU code do (and here too) u(n-k-i )b30(t+3i) */
			acc = MAC16_16(acc, excitationVectorNMinusKPlusOne[i], b302[j]); /* u(n-k+1+i)b30(3-t+3i) */
		}
		excitationVector[n] = SATURATE(PSHR(acc, 15), MAXINT16); /* acc in Q15, shift/round to unscaled value and check overflow on 16 bits */
	}
	return;
}
Exemple #29
0
        /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
        sum = ADD32(sum, SHR32(part, 6));
    }
    return sum;
}
#endif

#ifndef OVERRIDE_PITCH_XCORR
#if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
void pitch_xcorr(const spx_word16_t* _x, const spx_word16_t* _y, spx_word32_t* corr, int len, int nb_pitch, char* stack) {
    int i, j;
    for (i = 0; i < nb_pitch; i += 4) {
        /* Compute correlation*/
        /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
        spx_word32_t sum1 = 0;
        spx_word32_t sum2 = 0;
        spx_word32_t sum3 = 0;
        spx_word32_t sum4 = 0;
        const spx_word16_t* y = _y + i;
        const spx_word16_t* x = _x;
        spx_word16_t y0, y1, y2, y3;
        /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
        y0 = *y++;
        y1 = *y++;
        y2 = *y++;
        y3 = *y++;
        for (j = 0; j < len; j += 4) {
            spx_word32_t part1;
            spx_word32_t part2;
            spx_word32_t part3;
            spx_word32_t part4;
            part1 = MULT16_16(*x, y0);
            part2 = MULT16_16(*x, y1);
            part3 = MULT16_16(*x, y2);
            part4 = MULT16_16(*x, y3);
            x++;
            y0 = *y++;
            part1 = MAC16_16(part1, *x, y1);
            part2 = MAC16_16(part2, *x, y2);
            part3 = MAC16_16(part3, *x, y3);
            part4 = MAC16_16(part4, *x, y0);
            x++;
            y1 = *y++;
            part1 = MAC16_16(part1, *x, y2);
            part2 = MAC16_16(part2, *x, y3);
            part3 = MAC16_16(part3, *x, y0);
            part4 = MAC16_16(part4, *x, y1);
            x++;
            y2 = *y++;
            part1 = MAC16_16(part1, *x, y3);
            part2 = MAC16_16(part2, *x, y0);
            part3 = MAC16_16(part3, *x, y1);
            part4 = MAC16_16(part4, *x, y2);
            x++;
            y3 = *y++;

            sum1 = ADD32(sum1, SHR32(part1, 6));
            sum2 = ADD32(sum2, SHR32(part2, 6));
            sum3 = ADD32(sum3, SHR32(part3, 6));
            sum4 = ADD32(sum4, SHR32(part4, 6));
        }
        corr[nb_pitch - 1 - i] = sum1;
        corr[nb_pitch - 2 - i] = sum2;
        corr[nb_pitch - 3 - i] = sum3;
        corr[nb_pitch - 4 - i] = sum4;
    }

}
Exemple #30
0
void pitch_unquant_3tap(
    spx_word16_t exc[],             /* Input excitation */
    spx_word32_t exc_out[],         /* Output excitation */
    int   start,                    /* Smallest pitch value allowed */
    int   end,                      /* Largest pitch value allowed */
    spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
    const void* par,
    int   nsf,                      /* Number of samples in subframe */
    int* pitch_val,
    spx_word16_t* gain_val,
    SpeexBits* bits,
    char* stack,
    int count_lost,
    int subframe_offset,
    spx_word16_t last_pitch_gain,
    int cdbk_offset
) {
    int i;
    int pitch;
    int gain_index;
    spx_word16_t gain[3];
    const signed char* gain_cdbk;
    int gain_cdbk_size;
    const ltp_params* params;

    params = (const ltp_params*) par;
    gain_cdbk_size = 1 << params->gain_bits;
    gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset;

    pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
    pitch += start;
    gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
    /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
#ifdef FIXED_POINT
    gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4]);
    gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 1]);
    gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 2]);
#else
    gain[0] = 0.015625 * gain_cdbk[gain_index * 4] + .5;
    gain[1] = 0.015625 * gain_cdbk[gain_index * 4 + 1] + .5;
    gain[2] = 0.015625 * gain_cdbk[gain_index * 4 + 2] + .5;
#endif

    if (count_lost && pitch > subframe_offset) {
        spx_word16_t gain_sum;
        if (1) {
#ifdef FIXED_POINT
            spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain, 1);
            if (tmp > 62)
                tmp = 62;
#else
            spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain;
            if (tmp > .95)
                tmp = .95;
#endif
            gain_sum = gain_3tap_to_1tap(gain);

            if (gain_sum > tmp) {
                spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp), 14), gain_sum);
                for (i = 0; i < 3; i++)
                    gain[i] = MULT16_16_Q14(fact, gain[i]);
            }

        }

    }

    *pitch_val = pitch;
    gain_val[0] = gain[0];
    gain_val[1] = gain[1];
    gain_val[2] = gain[2];
    gain[0] = SHL16(gain[0], 7);
    gain[1] = SHL16(gain[1], 7);
    gain[2] = SHL16(gain[2], 7);
    SPEEX_MEMSET(exc_out, 0, nsf);
    for (i = 0; i < 3; i++) {
        int j;
        int tmp1, tmp3;
        int pp = pitch + 1 - i;
        tmp1 = nsf;
        if (tmp1 > pp)
            tmp1 = pp;
        for (j = 0; j < tmp1; j++)
            exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp]);
        tmp3 = nsf;
        if (tmp3 > pp + pitch)
            tmp3 = pp + pitch;
        for (j = tmp1; j < tmp3; j++)
            exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp - pitch]);
    }
    /*for (i=0;i<nsf;i++)
    exc[i]=PSHR32(exc32[i],13);*/
}