Exemple #1
1
bool SamplerJitCache::Jit_Decode5551() {
	MOV(32, R(tempReg2), R(resultReg));
	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg2), Imm32(0x0000001F));
	AND(32, R(tempReg1), Imm32(0x000003E0));
	SHL(32, R(tempReg1), Imm8(3));
	OR(32, R(tempReg2), R(tempReg1));

	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg1), Imm32(0x00007C00));
	SHL(32, R(tempReg1), Imm8(6));
	OR(32, R(tempReg2), R(tempReg1));

	// Expand 5 -> 8.  After this is just A.
	MOV(32, R(tempReg1), R(tempReg2));
	SHL(32, R(tempReg2), Imm8(3));
	SHR(32, R(tempReg1), Imm8(2));
	// Chop off the bits that were shifted out.
	AND(32, R(tempReg1), Imm32(0x00070707));
	OR(32, R(tempReg2), R(tempReg1));

	// For A, we shift it to a single bit, and then subtract and XOR.
	// That's probably the simplest way to expand it...
	SHR(32, R(resultReg), Imm8(15));
	// If it was 0, it's now -1, otherwise it's 0.  Easy.
	SUB(32, R(resultReg), Imm8(1));
	XOR(32, R(resultReg), Imm32(0xFF000000));
	AND(32, R(resultReg), Imm32(0xFF000000));
	OR(32, R(resultReg), R(tempReg2));

	return true;
}
Exemple #2
1
bool SamplerJitCache::Jit_Decode5650() {
	MOV(32, R(tempReg2), R(resultReg));
	AND(32, R(tempReg2), Imm32(0x0000001F));

	// B (we do R and B at the same time, they're both 5.)
	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg1), Imm32(0x0000F800));
	SHL(32, R(tempReg1), Imm8(5));
	OR(32, R(tempReg2), R(tempReg1));

	// Expand 5 -> 8.  At this point we have 00BB00RR.
	MOV(32, R(tempReg1), R(tempReg2));
	SHL(32, R(tempReg2), Imm8(3));
	SHR(32, R(tempReg1), Imm8(2));
	OR(32, R(tempReg2), R(tempReg1));
	AND(32, R(tempReg2), Imm32(0x00FF00FF));

	// Now's as good a time to put in A as any.
	OR(32, R(tempReg2), Imm32(0xFF000000));

	// Last, we need to align, extract, and expand G.
	// 3 to align to G, and then 2 to expand to 8.
	SHL(32, R(resultReg), Imm8(3 + 2));
	AND(32, R(resultReg), Imm32(0x0000FC00));
	MOV(32, R(tempReg1), R(resultReg));
	// 2 to account for resultReg being preshifted, 4 for expansion.
	SHR(32, R(tempReg1), Imm8(2 + 4));
	OR(32, R(resultReg), R(tempReg1));
	AND(32, R(resultReg), Imm32(0x0000FF00));
	OR(32, R(resultReg), R(tempReg2));;

	return true;
}
word32_t ChebyshevPolynomial(word16_t x, word32_t f[])
{
    /* bk in Q15*/
    word32_t bk;
    word32_t bk1 = ADD32(SHL(x,1), f[1]); /* init: b4=2x+f1 */
    word32_t bk2 = ONE_IN_Q15; /* init: b5=1 */
    uint8_t k;
    for (k=3; k>0; k--) { /* at the end of loop execution we have b1 in bk1 and b2 in bk2 */
        bk = SUB32(ADD32(SHL(MULT16_32_Q15(x,bk1), 1), f[5-k]), bk2); /* bk = 2*x*bk1 − bk2 + f(5-k) all in Q15*/
        bk2 = bk1;
        bk1 = bk;
    }

    return SUB32(ADD32(MULT16_32_Q15(x,bk1), SHR(f[5],1)), bk2); /* C(x) = x*b1 - b2 + f(5)/2 */
}
Exemple #4
0
static void kf_bfly2(
    kiss_fft_cpx * Fout,
    const size_t fstride,
    const kiss_fft_cfg st,
    int m
)
{
    kiss_fft_cpx * Fout2;
    kiss_fft_cpx * tw1 = st->twiddles;
    kiss_fft_cpx t;
    Fout2 = Fout + m;
    if (!st->inverse) {
        int i;
        kiss_fft_cpx *x=Fout;
        for (i=0; i<2*m; i++)
        {
            x[i].r = SHR(x[i].r,1);
            x[i].i = SHR(x[i].i,1);
        }
    }

    do {
        C_MUL (t,  *Fout2 , *tw1);
        tw1 += fstride;
        C_SUB( *Fout2 ,  *Fout , t );
        C_ADDTO( *Fout ,  t );
        ++Fout2;
        ++Fout;
    } while (--m);
}
Exemple #5
0
void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *y1, spx_sig_t *y2, int N, int M, spx_word16_t *mem, char *stack)
{
   int i,j,k,M2;
   spx_word16_t *a;
   spx_word16_t *x;
   spx_word16_t *x2;
   
   a = PUSH(stack, M, spx_word16_t);
   x = PUSH(stack, N+M-1, spx_word16_t);
   x2=x+M-1;
   M2=M>>1;
   for (i=0;i<M;i++)
      a[M-i-1]= aa[i];

   for (i=0;i<M-1;i++)
      x[i]=mem[M-i-2];
   for (i=0;i<N;i++)
      x[i+M-1]=SATURATE(PSHR(xx[i],1),16383);
   for (i=0,k=0;i<N;i+=2,k++)
   {
      y1[k]=0;
      y2[k]=0;
      for (j=0;j<M2;j++)
      {
         y1[k]+=SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1);
         y2[k]-=SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1);
         j++;
         y1[k]+=SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1);
         y2[k]+=SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1);
      }
   }
   for (i=0;i<M-1;i++)
     mem[i]=SATURATE(PSHR(xx[N-i-1],1),16383);
}
Exemple #6
0
bool SamplerJitCache::Jit_GetTexDataSwizzled4() {
	// Get the horizontal tile pos into tempReg1.
	LEA(32, tempReg1, MScaled(uReg, SCALE_4, 0));
	// Note: imm8 sign extends negative.
	AND(32, R(tempReg1), Imm8(~127));

	// Add vertical offset inside tile to tempReg1.
	LEA(32, tempReg2, MScaled(vReg, SCALE_4, 0));
	AND(32, R(tempReg2), Imm8(31));
	LEA(32, tempReg1, MComplex(tempReg1, tempReg2, SCALE_4, 0));
	// Add srcReg, since we'll need it at some point.
	ADD(64, R(tempReg1), R(srcReg));

	// Now find the vertical tile pos, and add to tempReg1.
	SHR(32, R(vReg), Imm8(3));
	LEA(32, EAX, MScaled(bufwReg, SCALE_4, 0));
	MUL(32, R(vReg));
	ADD(64, R(tempReg1), R(EAX));

	// Last and possible also least, the horizontal offset inside the tile.
	AND(32, R(uReg), Imm8(31));
	SHR(32, R(uReg), Imm8(1));
	MOV(8, R(resultReg), MRegSum(tempReg1, uReg));
	FixupBranch skipNonZero = J_CC(CC_NC);
	// If the horizontal offset was odd, take the upper 4.
	SHR(8, R(resultReg), Imm8(4));
	SetJumpTarget(skipNonZero);
	// Zero out the rest of the bits.
	AND(32, R(resultReg), Imm8(0x0F));

	return true;
}
Exemple #7
0
static void kf_bfly2(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   kiss_fft_cpx * Fout2;
   const kiss_twiddle_cpx * tw1;
   int i,j;
   kiss_fft_cpx * Fout_beg = Fout;
   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      Fout2 = Fout + m;
      tw1 = st->twiddles;
      for(j=0;j<m;j++)
      {
         kiss_fft_cpx t;
         Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
         Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
         C_MUL (t,  *Fout2 , *tw1);
         tw1 += fstride;
         C_SUB( *Fout2 ,  *Fout , t );
         C_ADDTO( *Fout ,  t );
         ++Fout2;
         ++Fout;
      }
   }
}
Exemple #8
0
bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
	if (id.swizzle) {
		return Jit_GetTexDataSwizzled(id, bitsPerTexel);
	}

	// srcReg might be EDX, so let's copy that before we multiply.
	switch (bitsPerTexel) {
	case 32:
	case 16:
	case 8:
		LEA(64, tempReg1, MComplex(srcReg, uReg, bitsPerTexel / 8, 0));
		break;

	case 4: {
		XOR(32, R(tempReg2), R(tempReg2));
		SHR(32, R(uReg), Imm8(1));
		FixupBranch skip = J_CC(CC_NC);
		// Track whether we shifted a 1 off or not.
		MOV(32, R(tempReg2), Imm32(4));
		SetJumpTarget(skip);
		LEA(64, tempReg1, MRegSum(srcReg, uReg));
		break;
	}

	default:
		return false;
	}

	MOV(32, R(EAX), R(vReg));
	MUL(32, R(bufwReg));

	switch (bitsPerTexel) {
	case 32:
	case 16:
	case 8:
		MOVZX(32, bitsPerTexel, resultReg, MComplex(tempReg1, RAX, bitsPerTexel / 8, 0));
		break;

	case 4: {
		SHR(32, R(RAX), Imm8(1));
		MOV(8, R(resultReg), MRegSum(tempReg1, RAX));
		// RCX is now free.
		MOV(8, R(RCX), R(tempReg2));
		SHR(8, R(resultReg), R(RCX));
		// Zero out any bits not shifted off.
		AND(32, R(resultReg), Imm8(0x0F));
		break;
	}

	default:
		return false;
	}

	return true;
}
Exemple #9
0
/* Return 1 if A + B does not overflow: */
static int time_t_int_add_ok(time_t a, int b)
{
  verify(int_no_wider_than_time_t, (INT_MAX <= TIME_T_MAX));
  if (WRAPV) {
      time_t sum = (a + b);
      return ((sum < a) == (b < 0));
  } else {
      int a_odd;
	  time_t avg;
	  a_odd = (int)(a & 1);
	  avg = (SHR(a, 1) + (SHR(b, 1) + (a_odd & b)));
      return (((TIME_T_MIN / 2) <= avg) && (avg <= (TIME_T_MAX / 2)));
  }
}
Exemple #10
0
/* By segher */
void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N, int M, spx_word32_t *mem, char *stack)
   /* assumptions:
      all odd x[i] are zero -- well, actually they are left out of the array now
      N and M are multiples of 4 */
{
   int i, j;
   spx_word16_t *xx;
   
   xx= PUSH(stack, M+N-1, spx_word16_t);

   for (i = 0; i < N/2; i++)
      xx[2*i] = SHR(x[N/2-1-i],SIG_SHIFT+1);
   for (i = 0; i < M - 1; i += 2)
      xx[N+i] = mem[i+1];

   for (i = 0; i < N; i += 4) {
      spx_sig_t y0, y1, y2, y3;
      spx_word16_t x0;

      y0 = y1 = y2 = y3 = 0;
      x0 = xx[N-4-i];

      for (j = 0; j < M; j += 4) {
         spx_word16_t x1;
         spx_word16_t a0, a1;

         a0 = a[j];
         a1 = a[j+1];
         x1 = xx[N-2+j-i];

         y0 += SHR(MULT16_16(a0, x1),1);
         y1 += SHR(MULT16_16(a1, x1),1);
         y2 += SHR(MULT16_16(a0, x0),1);
         y3 += SHR(MULT16_16(a1, x0),1);

         a0 = a[j+2];
         a1 = a[j+3];
         x0 = xx[N+j-i];

         y0 += SHR(MULT16_16(a0, x0),1);
         y1 += SHR(MULT16_16(a1, x0),1);
         y2 += SHR(MULT16_16(a0, x1),1);
         y3 += SHR(MULT16_16(a1, x1),1);
      }
      y[i] = y0;
      y[i+1] = y1;
      y[i+2] = y2;
      y[i+3] = y3;
   }

   for (i = 0; i < M - 1; i += 2)
      mem[i+1] = xx[i];
}
Exemple #11
0
// In: RAX: s64 _Value
void DSPEmitter::Update_SR_Register16(X64Reg val)
{
	OpArg sr_reg;
	gpr.GetReg(DSP_REG_SR, sr_reg);
	AND(16, sr_reg, Imm16(~SR_CMP_MASK));

	//	// 0x04
	//	if (_Value == 0) g_dsp.r[DSP_REG_SR] |= SR_ARITH_ZERO;
	TEST(64, R(val), R(val));
	FixupBranch notZero = J_CC(CC_NZ);
	OR(16, sr_reg, Imm16(SR_ARITH_ZERO | SR_TOP2BITS));
	FixupBranch end = J();
	SetJumpTarget(notZero);

	//	// 0x08
	//	if (_Value < 0) g_dsp.r[DSP_REG_SR] |= SR_SIGN;
	FixupBranch greaterThanEqual = J_CC(CC_GE);
	OR(16, sr_reg, Imm16(SR_SIGN));
	SetJumpTarget(greaterThanEqual);

	//	// 0x20 - Checks if top bits of m are equal
	//	if ((((u16)_Value >> 14) == 0) || (((u16)_Value >> 14) == 3))
	SHR(16, R(val), Imm8(14));
	TEST(16, R(val), R(val));
	FixupBranch isZero = J_CC(CC_Z);
	CMP(16, R(val), Imm16(3));
	FixupBranch notThree = J_CC(CC_NE);
	SetJumpTarget(isZero);
	//		g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS;
	OR(16, sr_reg, Imm16(SR_TOP2BITS));
	SetJumpTarget(notThree);
	SetJumpTarget(end);
	gpr.PutReg(DSP_REG_SR);
}
Exemple #12
0
int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
{
   int i;
   spx_sig_t max_val=1;
   int sig_shift;
   
   for (i=0;i<len;i++)
   {
      spx_sig_t tmp = x[i];
      if (tmp<0)
         tmp = -tmp;
      if (tmp >= max_val)
         max_val = tmp;
   }

   sig_shift=0;
   while (max_val>max_scale)
   {
      sig_shift++;
      max_val >>= 1;
   }

   for (i=0;i<len;i++)
      y[i] = SHR(x[i], sig_shift);
   
   return sig_shift;
}
uint16_t findOpenLoopPitchDelay(word16_t weightedInputSignal[])
{
	int i;
	/*** scale the signal to avoid overflows ***/
	word16_t scaledWeightedInputSignalBuffer[MAXIMUM_INT_PITCH_DELAY+L_FRAME]; /* this buffer might store the scaled version of input Signal, if scaling is not needed, it is not used */
	word16_t *scaledWeightedInputSignal; /* points to the begining of present frame either scaled or directly the input signal */
	word64_t autocorrelation = 0;
	uint16_t indexRange1=0, indexRange2=0, indexRange3Even=0, indexRange3;
	word32_t correlationMaxRange1;
	word32_t correlationMaxRange2;
	word32_t correlationMaxRange3;
	word32_t correlationMaxRange3Odd;
	word32_t autoCorrelationRange1;
	word32_t autoCorrelationRange2;
	word32_t autoCorrelationRange3;
	word32_t normalisedCorrelationMaxRange1;
	word32_t normalisedCorrelationMaxRange2;
	word32_t normalisedCorrelationMaxRange3;
	uint16_t indexMultiple;

	/* compute on 64 bits the autocorrelation on the input signal and if needed scale to have it on 32 bits */
	for (i=-MAXIMUM_INT_PITCH_DELAY; i<L_FRAME; i++) {
		autocorrelation = MAC64(autocorrelation, weightedInputSignal[i], weightedInputSignal[i]);
	}
	if (autocorrelation>MAXINT32) {
		int overflowScale;
		scaledWeightedInputSignal = &(scaledWeightedInputSignalBuffer[MAXIMUM_INT_PITCH_DELAY]);
		overflowScale = PSHR(31-countLeadingZeros((word32_t)(autocorrelation>>31)),1); /* count number of bits needed over the 31 bits allowed and divide by 2 to get the right scaling for the signal */
		for (i=-MAXIMUM_INT_PITCH_DELAY; i<L_FRAME; i++) {
			scaledWeightedInputSignal[i] = SHR(weightedInputSignal[i], overflowScale);
		}

	} else { /* scaledWeightedInputSignal points directly to weightedInputSignal */
Exemple #14
0
spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed)
{
   spx_word32_t res;
   *seed = 1664525 * *seed + 1013904223;
   res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std);
   return SUB32(res, SHR(res, 3));
}
Exemple #15
0
/* Return 1 if A + B does not overflow.  */
static int
time_t_int_add_ok (time_t a, int b)
{
  verify (int_no_wider_than_time_t, INT_MAX <= TIME_T_MAX);
  if (WRAPV)
    {
      time_t sum = a + b;
      return (sum < a) == (b < 0);
    }
  else
    {
      int a_odd = a & 1;
      time_t avg = SHR (a, 1) + (SHR (b, 1) + (a_odd & b));
      return TIME_T_MIN / 2 <= avg && avg <= TIME_T_MAX / 2;
    }
}
Exemple #16
0
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) {
	GEPaletteFormat fmt = (GEPaletteFormat)id.clutfmt;
	if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) {
		// This is simple - just mask if necessary.
		if (bitsPerIndex > 8) {
			AND(32, R(resultReg), Imm32(0x000000FF));
		}
		return true;
	}

	MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate.clutformat));
	MOV(32, R(tempReg1), MatR(tempReg1));

	// Shift = (clutformat >> 2) & 0x1F
	if (id.hasClutShift) {
		MOV(32, R(RCX), R(tempReg1));
		SHR(32, R(RCX), Imm8(2));
		AND(32, R(RCX), Imm8(0x1F));
		SHR(32, R(resultReg), R(RCX));
	}

	// Mask = (clutformat >> 8) & 0xFF
	if (id.hasClutMask) {
		MOV(32, R(tempReg2), R(tempReg1));
		SHR(32, R(tempReg2), Imm8(8));
		AND(32, R(resultReg), R(tempReg2));
	}

	// We need to wrap any entries beyond the first 1024 bytes.
	u32 offsetMask = fmt == GE_CMODE_32BIT_ABGR8888 ? 0x00FF : 0x01FF;

	// We must mask to 0xFF before ORing 0x100 in 16 bit CMODEs.
	// But skip if we'll mask 0xFF after offset anyway.
	if (bitsPerIndex > 8 && (!id.hasClutOffset || offsetMask != 0x00FF)) {
		AND(32, R(resultReg), Imm32(0x000000FF));
	}

	// Offset = (clutformat >> 12) & 0x01F0
	if (id.hasClutOffset) {
		SHR(32, R(tempReg1), Imm8(16));
		SHL(32, R(tempReg1), Imm8(4));
		OR(32, R(resultReg), R(tempReg1));
		AND(32, R(resultReg), Imm32(offsetMask));
	}
	return true;
}
Exemple #17
0
/* FIXME: These functions are ugly and probably introduce too much error */
void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
{
   int i;
   for (i=0;i<len;i++)
   {
      y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7);
   }
}
Exemple #18
0
/*Makes sure the LSPs are stable*/
void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin)
{
   int i;
   spx_word16_t m = margin;
   spx_word16_t m2 = 25736-margin;
  
   if (lsp[0]<m)
      lsp[0]=m;
   if (lsp[len-1]>m2)
      lsp[len-1]=m2;
   for (i=1;i<len-1;i++)
   {
      if (lsp[i]<lsp[i-1]+m)
         lsp[i]=lsp[i-1]+m;

      if (lsp[i]>lsp[i+1]-m)
         lsp[i]= SHR(lsp[i],1) + SHR(lsp[i+1]-m,1);
   }
}
Exemple #19
0
static inline time_t
ydhms_diff (long_int year1, long_int yday1, int hour1, int min1, int sec1,
            int year0, int yday0, int hour0, int min0, int sec0)
{
  verify (C99_integer_division, -1 / 2 == 0);

  /* Compute intervening leap days correctly even if year is negative.
     Take care to avoid integer overflow here.  */
  int a4 = SHR (year1, 2) + SHR (TM_YEAR_BASE, 2) - ! (year1 & 3);
  int b4 = SHR (year0, 2) + SHR (TM_YEAR_BASE, 2) - ! (year0 & 3);
  int a100 = a4 / 25 - (a4 % 25 < 0);
  int b100 = b4 / 25 - (b4 % 25 < 0);
  int a400 = SHR (a100, 2);
  int b400 = SHR (b100, 2);
  int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);

  /* Compute the desired time in time_t precision.  Overflow might
     occur here.  */
  time_t tyear1 = year1;
  time_t years = tyear1 - year0;
  time_t days = 365 * years + yday1 - yday0 + intervening_leap_days;
  time_t hours = 24 * days + hour1 - hour0;
  time_t minutes = 60 * hours + min1 - min0;
  time_t seconds = 60 * minutes + sec1 - sec0;
  return seconds;
}
Exemple #20
0
static time_t
ydhms_diff(long_int year1, long_int yday1, int hour1, int min1, int sec1,
		   int year0, int yday0, int hour0, int min0, int sec0)
{
  verify(C99_integer_division, ((-1 / 2) == 0));

  /* Compute intervening leap days correctly even if year is negative.
   * Take care to avoid integer overflow here: */
  int a4 = (int)(SHR(year1, 2) + SHR(TM_YEAR_BASE, 2) - !(year1 & 3));
  int b4 = (SHR(year0, 2) + SHR(TM_YEAR_BASE, 2) - !(year0 & 3));
  int a100 = ((a4 / 25) - ((a4 % 25) < 0));
  int b100 = ((b4 / 25) - ((b4 % 25) < 0));
  int a400 = SHR(a100, 2);
  int b400 = SHR(b100, 2);
  int intervening_leap_days = ((a4 - b4) - (a100 - b100) + (a400 - b400));

  /* Compute the desired time in time_t precision. Overflow might occur here: */
  time_t tyear1 = year1;
  time_t years = (tyear1 - year0);
  time_t days = ((365 * years) + yday1 - yday0 + intervening_leap_days);
  time_t hours = ((24 * days) + hour1 - hour0);
  time_t minutes = ((60 * hours) + min1 - min0);
  time_t seconds = ((60 * minutes) + sec1 - sec0);
  return seconds;
}
Exemple #21
0
static int
tm_diff (const struct tm *a, const struct tm *b)
{
    /* Compute intervening leap days correctly even if year is negative.
       Take care to avoid int overflow in leap day calculations,
       but it's OK to assume that A and B are close to each other.  */
    int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3);
    int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3);
    int a100 = a4 / 25 - (a4 % 25 < 0);
    int b100 = b4 / 25 - (b4 % 25 < 0);
    int a400 = SHR (a100, 2);
    int b400 = SHR (b100, 2);
    int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
    int years = a->tm_year - b->tm_year;
    int days = (365 * years + intervening_leap_days
                + (a->tm_yday - b->tm_yday));
    return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
                  + (a->tm_min - b->tm_min))
            + (a->tm_sec - b->tm_sec));
}
/* Yield the difference between *A and *B,
   measured in seconds, ignoring leap seconds.
   The body of this function is taken directly from the GNU C Library;
   see src/strftime.c.  */
static long int
tm_diff (struct tm const *a, struct tm const *b)
{
  /* Compute intervening leap days correctly even if year is negative.
     Take care to avoid int overflow in leap day calculations.  */
  int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3);
  int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3);
  int a100 = a4 / 25 - (a4 % 25 < 0);
  int b100 = b4 / 25 - (b4 % 25 < 0);
  int a400 = SHR (a100, 2);
  int b400 = SHR (b100, 2);
  int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
  long int ayear = a->tm_year;
  long int years = ayear - b->tm_year;
  long int days = (365 * years + intervening_leap_days
                   + (a->tm_yday - b->tm_yday));
  return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
                + (a->tm_min - b->tm_min))
          + (a->tm_sec - b->tm_sec));
}
Exemple #23
0
spx_word16_t compute_rms(const spx_sig_t *x, int len)
{
   int i;
   spx_word32_t sum=0;
   spx_sig_t max_val=1;
   int sig_shift;

   for (i=0;i<len;i++)
   {
      spx_sig_t tmp = x[i];
      if (tmp<0)
         tmp = -tmp;
      if (tmp > max_val)
         max_val = tmp;
   }

   sig_shift=0;
   while (max_val>16383)
   {
      sig_shift++;
      max_val >>= 1;
   }

   for (i=0;i<len;i+=4)
   {
      spx_word32_t sum2=0;
      spx_word16_t tmp;
      tmp = SHR(x[i],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+1],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+2],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+3],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      sum += SHR(sum2,6);
   }
   
   return SHR(SHL((spx_word32_t)spx_sqrt(1+DIV32(sum,len)),(sig_shift+3)),SIG_SHIFT);
}
void split_cb_search_shape_sign(
spx_sig_t target[],			/* target vector */
spx_coef_t ak[],			/* LPCs for this subframe */
spx_coef_t awk1[],			/* Weighted LPCs for this subframe */
spx_coef_t awk2[],			/* Weighted LPCs for this subframe */
const void *par,                      /* Codebook/search parameters*/
int   p,                        /* number of LPC coeffs */
int   nsf,                      /* number of samples in subframe */
spx_sig_t *exc,
spx_sig_t *r,
SpeexBits *bits,
char *stack,
int   complexity
)
{
   int i,j,k,m,n,q;
   spx_word16_t *resp;
#ifdef _USE_SSE
   __m128 *resp2;
   __m128 *E;
#else
   spx_word16_t *resp2;
   spx_word32_t *E;
#endif
   spx_word16_t *t;
   spx_sig_t *e, *r2;
   spx_word16_t *tmp;
   spx_word32_t *ndist, *odist;
   int *itmp;
   spx_word16_t **ot, **nt;
   int **nind, **oind;
   int *ind;
   const signed char *shape_cb;
   int shape_cb_size, subvect_size, nb_subvect;
   split_cb_params *params;
   int N=2;
   int *best_index;
   spx_word32_t *best_dist;
   int have_sign;
   N=complexity;
   if (N>10)
      N=10;

   ot=PUSH(stack, N, spx_word16_t*);
   nt=PUSH(stack, N, spx_word16_t*);
   oind=PUSH(stack, N, int*);
   nind=PUSH(stack, N, int*);

   params = (split_cb_params *) par;
   subvect_size = params->subvect_size;
   nb_subvect = params->nb_subvect;
   shape_cb_size = 1<<params->shape_bits;
   shape_cb = params->shape_cb;
   have_sign = params->have_sign;
   resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
#ifdef _USE_SSE
   resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
   E = PUSH(stack, shape_cb_size>>2, __m128);
#else
   resp2 = resp;
   E = PUSH(stack, shape_cb_size, spx_word32_t);
#endif
   t = PUSH(stack, nsf, spx_word16_t);
   e = PUSH(stack, nsf, spx_sig_t);
   r2 = PUSH(stack, nsf, spx_sig_t);
   ind = PUSH(stack, nb_subvect, int);

   tmp = PUSH(stack, 2*N*nsf, spx_word16_t);
   for (i=0;i<N;i++)
   {
      ot[i]=tmp;
      tmp += nsf;
      nt[i]=tmp;
      tmp += nsf;
   }
   best_index = PUSH(stack, N, int);
   best_dist = PUSH(stack, N, spx_word32_t);
   ndist = PUSH(stack, N, spx_word32_t);
   odist = PUSH(stack, N, spx_word32_t);
   
   itmp = PUSH(stack, 2*N*nb_subvect, int);
   for (i=0;i<N;i++)
   {
      nind[i]=itmp;
      itmp+=nb_subvect;
      oind[i]=itmp;
      itmp+=nb_subvect;
      for (j=0;j<nb_subvect;j++)
         nind[i][j]=oind[i][j]=-1;
   }
   
   /* FIXME: make that adaptive? */
   for (i=0;i<nsf;i++)
      t[i]=SHR(target[i],6);

   for (j=0;j<N;j++)
      for (i=0;i<nsf;i++)
         ot[j][i]=t[i];

   /*for (i=0;i<nsf;i++)
     printf ("%d\n", (int)t[i]);*/

   /* Pre-compute codewords response and energy */
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);

   for (j=0;j<N;j++)
      odist[j]=0;
   /*For all subvectors*/
   for (i=0;i<nb_subvect;i++)
   {
      /*"erase" nbest list*/
      for (j=0;j<N;j++)
         ndist[j]=-2;

      /*For all n-bests of previous subvector*/
      for (j=0;j<N;j++)
      {
         spx_word16_t *x=ot[j]+subvect_size*i;
         /*Find new n-best based on previous n-best j*/
         if (have_sign)
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
         else
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);

         /*For all new n-bests*/
         for (k=0;k<N;k++)
         {
            spx_word16_t *ct;
            spx_word32_t err=0;
            ct = ot[j];
            /*update target*/

            /*previous target*/
            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
               t[m]=ct[m];

            /* New code: update only enough of the target to calculate error*/
            {
               int rind;
               spx_word16_t *res;
               spx_word16_t sign=1;
               rind = best_index[k];
               if (rind>=shape_cb_size)
               {
                  sign=-1;
                  rind-=shape_cb_size;
               }
               res = resp+rind*subvect_size;
               if (sign>0)
                  for (m=0;m<subvect_size;m++)
                     t[subvect_size*i+m] -= res[m];
               else
                  for (m=0;m<subvect_size;m++)
                     t[subvect_size*i+m] += res[m];
            }
            
            /*compute error (distance)*/
            err=odist[j];
            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
               err += t[m]*t[m];
            /*update n-best list*/
            if (err<ndist[N-1] || ndist[N-1]<-1)
            {

               /*previous target (we don't care what happened before*/
               for (m=(i+1)*subvect_size;m<nsf;m++)
                  t[m]=ct[m];
               /* New code: update the rest of the target only if it's worth it */
               for (m=0;m<subvect_size;m++)
               {
                  spx_word16_t g;
                  int rind;
                  spx_word16_t sign=1;
                  rind = best_index[k];
                  if (rind>=shape_cb_size)
                  {
                     sign=-1;
                     rind-=shape_cb_size;
                  }

                  q=subvect_size-m;
#ifdef FIXED_POINT
                  g=sign*shape_cb[rind*subvect_size+m];
                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
                     t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q]));
#else
                  g=sign*0.03125*shape_cb[rind*subvect_size+m];
                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
                     t[n] = SUB32(t[n],g*r[q]);
#endif
               }


               for (m=0;m<N;m++)
               {
                  if (err < ndist[m] || ndist[m]<-1)
                  {
                     for (n=N-1;n>m;n--)
                     {
                        for (q=(i+1)*subvect_size;q<nsf;q++)
                           nt[n][q]=nt[n-1][q];
                        for (q=0;q<nb_subvect;q++)
                           nind[n][q]=nind[n-1][q];
                        ndist[n]=ndist[n-1];
                     }
                     for (q=(i+1)*subvect_size;q<nsf;q++)
                        nt[m][q]=t[q];
                     for (q=0;q<nb_subvect;q++)
                        nind[m][q]=oind[j][q];
                     nind[m][i]=best_index[k];
                     ndist[m]=err;
                     break;
                  }
               }
            }
         }
         if (i==0)
           break;
      }

      /*update old-new data*/
      /* just swap pointers instead of a long copy */
      {
         spx_word16_t **tmp2;
         tmp2=ot;
         ot=nt;
         nt=tmp2;
      }
      for (j=0;j<N;j++)
         for (m=0;m<nb_subvect;m++)
            oind[j][m]=nind[j][m];
      for (j=0;j<N;j++)
         odist[j]=ndist[j];
   }

   /*save indices*/
   for (i=0;i<nb_subvect;i++)
   {
      ind[i]=nind[0][i];
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
   }
   
   /* Put everything back together */
   for (i=0;i<nb_subvect;i++)
   {
      int rind;
      spx_word16_t sign=1;
      rind = ind[i];
      if (rind>=shape_cb_size)
      {
         sign=-1;
         rind-=shape_cb_size;
      }
#ifdef FIXED_POINT
      if (sign==1)
      {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
      } else {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
      }
#else
      for (j=0;j<subvect_size;j++)
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
#endif
   }   
   /* Update excitation */
   for (j=0;j<nsf;j++)
      exc[j]+=e[j];
   
   /* Update target */
   syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
   for (j=0;j<nsf;j++)
      target[j]-=r2[j];

}
Exemple #25
0
/* Convert *TP to a time_t value, inverting
   the monotonic and mostly-unit-linear conversion function CONVERT.
   Use *OFFSET to keep track of a guess at the offset of the result,
   compared to what the result would be for UTC without leap seconds.
   If *OFFSET's guess is correct, only one CONVERT call is needed.
   This function is external because it is used also by timegm.c.  */
time_t
__mktime_internal(struct tm *tp,
		  struct tm *(*convert)(const time_t *, struct tm *),
		  time_t *offset)
{
  time_t t, gt, t0, t1, t2;
  struct tm tm;

  /* The maximum number of probes (calls to CONVERT) should be enough
     to handle any combinations of time zone rule changes, solar time,
     leap seconds, and oscillations around a spring-forward gap.
     POSIX.1 prohibits leap seconds, but some hosts have them anyway.  */
  int remaining_probes = 6;

  /* Time requested.  Copy it in case CONVERT modifies *TP; this can
     occur if TP is localtime's returned value and CONVERT is localtime.  */
  int sec = tp->tm_sec;
  int min = tp->tm_min;
  int hour = tp->tm_hour;
  int mday = tp->tm_mday;
  int mon = tp->tm_mon;
  int year_requested = tp->tm_year;
  int isdst = tp->tm_isdst;

  /* 1 if the previous probe was DST.  */
  int dst2;

  /* Ensure that mon is in range, and set year accordingly.  */
  int mon_remainder = mon % 12;
  int negative_mon_remainder = mon_remainder < 0;
  int mon_years = mon / 12 - negative_mon_remainder;
  long_int lyear_requested = year_requested;
  long_int year = lyear_requested + mon_years;

  /* The other values need not be in range:
     the remaining code handles minor overflows correctly,
     assuming int and time_t arithmetic wraps around.
     Major overflows are caught at the end.  */

  /* Calculate day of year from year, month, and day of month.
     The result need not be in range.  */
  int mon_yday = ((__mon_yday[leapyear (year)]
		   [mon_remainder + 12 * negative_mon_remainder])
		  - 1);
  long_int lmday = mday;
  long_int yday = mon_yday + lmday;

  time_t guessed_offset = *offset;

  int sec_requested = sec;

  if (LEAP_SECONDS_POSSIBLE)
    {
      /* Handle out-of-range seconds specially,
	 since ydhms_tm_diff assumes every minute has 60 seconds.  */
      if (sec < 0)
	sec = 0;
      if (59 < sec)
	sec = 59;
    }

  /* Invert CONVERT by probing. First assume the same offset as last time: */
  t0 = ydhms_diff(year, yday, hour, min, sec, (EPOCH_YEAR - TM_YEAR_BASE),
                  0, 0, 0, (int)-(guessed_offset));

  if ((TIME_T_MAX / INT_MAX / 366 / 24 / 60 / 60) < 3) {
      /* time_t is NOT large enough to rule out overflows, so check
	 for major overflows.  A gross check suffices, since if t0
	 has overflowed, it is off by a multiple of TIME_T_MAX -
	 TIME_T_MIN + 1.  So ignore any component of the difference
	 that is bounded by a small value.  */

      /* Approximate log base 2 of the number of time units per
	 biennium.  A biennium is 2 years; use this unit instead of
	 years to avoid integer overflow.  For example, 2 average
	 Gregorian years are 2 * 365.2425 * 24 * 60 * 60 seconds,
	 which is 63113904 seconds, and rint (log2 (63113904)) is
	 26.  */
      int ALOG2_SECONDS_PER_BIENNIUM = 26;
      int ALOG2_MINUTES_PER_BIENNIUM = 20;
      int ALOG2_HOURS_PER_BIENNIUM = 14;
      int ALOG2_DAYS_PER_BIENNIUM = 10;
      int LOG2_YEARS_PER_BIENNIUM = 1;

      int approx_requested_biennia =
	(SHR (year_requested, LOG2_YEARS_PER_BIENNIUM)
	 - SHR (EPOCH_YEAR - TM_YEAR_BASE, LOG2_YEARS_PER_BIENNIUM)
	 + SHR (mday, ALOG2_DAYS_PER_BIENNIUM)
	 + SHR (hour, ALOG2_HOURS_PER_BIENNIUM)
	 + SHR (min, ALOG2_MINUTES_PER_BIENNIUM)
	 + (LEAP_SECONDS_POSSIBLE
	    ? 0
	    : SHR (sec, ALOG2_SECONDS_PER_BIENNIUM)));

      int approx_biennia = (int)SHR(t0, ALOG2_SECONDS_PER_BIENNIUM);
      int diff = approx_biennia - approx_requested_biennia;
      int approx_abs_diff = diff < 0 ? -1 - diff : diff;

      /* IRIX 4.0.5 cc miscalculates TIME_T_MIN / 3: it erroneously
	 gives a positive value of 715827882.  Setting a variable
	 first then doing math on it seems to work.
	 ([email protected]) */
      time_t time_t_max = TIME_T_MAX;
      time_t time_t_min = TIME_T_MIN;
      time_t overflow_threshold =
	(time_t_max / 3 - time_t_min / 3) >> ALOG2_SECONDS_PER_BIENNIUM;

      if (overflow_threshold < approx_abs_diff)
	{
	  /* Overflow occurred.  Try repairing it; this might work if
	     the time zone offset is enough to undo the overflow.  */
	  time_t repaired_t0 = (-1 - t0);
	  approx_biennia = (int)SHR(repaired_t0, ALOG2_SECONDS_PER_BIENNIUM);
	  diff = (approx_biennia - approx_requested_biennia);
	  approx_abs_diff = diff < 0 ? -1 - diff : diff;
	  if (overflow_threshold < approx_abs_diff)
	    return -1;
	  guessed_offset += repaired_t0 - t0;
	  t0 = repaired_t0;
	}
    }
Exemple #26
0
void Jit::Comp_mxc1(MIPSOpcode op)
{
	CONDITIONAL_DISABLE;

	int fs = _FS;
	MIPSGPReg rt = _RT;

	switch((op >> 21) & 0x1f) 
	{
	case 0: // R(rt) = FI(fs); break; //mfc1
		if (rt != MIPS_REG_ZERO) {
			fpr.MapReg(fs, true, false);  // TODO: Seems the V register becomes dirty here? It shouldn't.
			gpr.MapReg(rt, false, true);
			MOVD_xmm(gpr.R(rt), fpr.RX(fs));
		}
		break;

	case 2: // R(rt) = currentMIPS->ReadFCR(fs); break; //cfc1
		if (fs == 31) {
			bool wasImm = gpr.IsImm(MIPS_REG_FPCOND);
			if (!wasImm) {
				gpr.Lock(rt, MIPS_REG_FPCOND);
				gpr.MapReg(MIPS_REG_FPCOND, true, false);
			}
			gpr.MapReg(rt, false, true);
			MOV(32, gpr.R(rt), M(&mips_->fcr31));
			if (wasImm) {
				if (gpr.GetImm(MIPS_REG_FPCOND) & 1) {
					OR(32, gpr.R(rt), Imm32(1 << 23));
				} else {
					AND(32, gpr.R(rt), Imm32(~(1 << 23)));
				}
			} else {
				AND(32, gpr.R(rt), Imm32(~(1 << 23)));
				MOV(32, R(EAX), gpr.R(MIPS_REG_FPCOND));
				AND(32, R(EAX), Imm32(1));
				SHL(32, R(EAX), Imm8(23));
				OR(32, gpr.R(rt), R(EAX));
			}
			gpr.UnlockAll();
		} else if (fs == 0) {
			gpr.SetImm(rt, MIPSState::FCR0_VALUE);
		} else {
			Comp_Generic(op);
		}
		return;

	case 4: //FI(fs) = R(rt);	break; //mtc1
		gpr.MapReg(rt, true, false);
		fpr.MapReg(fs, false, true);
		MOVD_xmm(fpr.RX(fs), gpr.R(rt));
		return;

	case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1
		if (fs == 31) {
			if (gpr.IsImm(rt)) {
				gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1);
				MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF));
			} else {
				gpr.Lock(rt, MIPS_REG_FPCOND);
				gpr.MapReg(rt, true, false);
				gpr.MapReg(MIPS_REG_FPCOND, false, true);
				MOV(32, gpr.R(MIPS_REG_FPCOND), gpr.R(rt));
				SHR(32, gpr.R(MIPS_REG_FPCOND), Imm8(23));
				AND(32, gpr.R(MIPS_REG_FPCOND), Imm32(1));
				MOV(32, M(&mips_->fcr31), gpr.R(rt));
				AND(32, M(&mips_->fcr31), Imm32(0x0181FFFF));
				gpr.UnlockAll();
			}
		} else {
Exemple #27
0
/* Return the average of A and B, even if A + B would overflow: */
static time_t
time_t_avg(time_t a, time_t b)
{
  return (SHR(a, 1) + SHR(b, 1) + (a & b & 1));
}
Exemple #28
0
 static word_type ROTL(word_type x, unsigned n) {
     return SHL(x, n) | SHR(x, word_bits-n);
 }
Exemple #29
0
int crypto_aead_decrypt(
                        unsigned char *m,unsigned long long *mlen,          // message
                        unsigned char *nsec,                                // not relavent to CLOC or SLIC
                        const unsigned char *c,unsigned long long clen,     // ciphertext
                        const unsigned char *ad,unsigned long long adlen,   // associated data
                        const unsigned char *npub,                          // nonce
                        const unsigned char *k                              // the master key
                        )
{
    block estate, tstate, tmp;  // encryption state, tag state, and temporary state
    estate = SETZERO();
    unsigned char ltag[16];     // local copy of temporary tag value
    unsigned long long i, lastblocklen,j;
    
    /* set ciphertext length */
    *mlen = clen - CRYPTO_ABYTES;
    
    /* generate round keys from master key */
    AES128_KeyExpansion(k);
    
    
    /* process the first (partial) block of ad */
    load_partial_block(&estate, ad, (adlen>STATE_LEN)?STATE_LEN:adlen, ONE_ZERO_PADDING);
    fix0(estate);
    AES128_encrypt(estate, estate);
    if((ad[0] & 0x80) || (adlen == 0)){
        // appy h
        h(estate);
    }
    else{
        // do nothing
    }
    
    if(adlen > STATE_LEN){ // ad is of moer than one block
        i = STATE_LEN;
        
        /* process the middle ad blocks, excluding the first and last (partial) block */
        while((i+STATE_LEN) < adlen)
        {
            tmp = LOAD(ad+i);
            estate = XOR(estate, tmp);
            AES128_encrypt(estate, estate);
            i += STATE_LEN;
        }
        
        /* process the last (partial) ad block */
        load_partial_block(&tmp, ad+i, adlen - i, ONE_ZERO_PADDING);
        estate = XOR(estate, tmp);
        AES128_encrypt(estate, estate);
    }
    
    
    /* process the nonce */
    load_partial_block(&tmp, npub, CRYPTO_NPUBBYTES, PARAM_OZP);
    estate = XOR(estate, tmp);
    
    if((adlen % STATE_LEN) || (adlen == 0)){
        /* apply f2 */
        f2(estate);
    }
    else{
        /* apply f1 */
        f1(estate);
    }
    
    /* process ciphertext */
    
    tstate = estate;
    AES128_encrypt(estate, estate);
    
    if(*mlen){
        /* apply g2 to tag state */
        g2(tstate);
    }
    else{
        /* apply g1 to tag state */
        g1(tstate);
    }
    AES128_encrypt(tstate, tstate);
    
    
    
    i = 0;
    /* process all the message except for the last message/ciphertext block */
    while((i + STATE_LEN) < (*mlen)){
        tmp = LOAD(c+i);
        estate = XOR(estate, tmp);
        STORE(m+i, estate);
        tstate = XOR(tmp, tstate);
        AES128_encrypt(tstate, tstate);
        fix1(tmp);
        print_state("after applying fix1\n", estate);
        AES128_encrypt(tmp, estate);
        i += STATE_LEN;
    }
    
    /* process the last block of the message/ciphetext */
    lastblocklen = (*mlen) - i;
    
    if(lastblocklen > 0){
        load_partial_block(&tmp, c+i, lastblocklen, ZERO_APPEND);
        estate = XOR(estate, tmp);
        print_state("after xoring last partial message block\n", estate);
        store_partial_block(m+i, estate, lastblocklen);
        unsigned char shift_bytes = (STATE_LEN - (unsigned char)lastblocklen);
        tmp = AND(SHR(_mm_set1_epi8(0xff), shift_bytes), tmp);
        tstate = XOR(tstate, tmp);
        /* add the one zero padding */
        tstate = XOR(tstate, SHL(_mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80), lastblocklen));
        
        if((*mlen) % STATE_LEN){
            /* apply f2 */
            f2(tstate);
        }
        else{
            /* apply f1 */
            f1(tstate);
        }
        AES128_encrypt(tstate, tstate);
    }
    
    /* compare tag and output message */
    STORE(ltag, tstate);
    for(j = 0; j < CRYPTO_ABYTES; j++){
        if(ltag[j] != c[clen - CRYPTO_ABYTES + j])
            return RETURN_TAG_NO_MATCH;
    }
    return RETURN_SUCCESS;

}
void gainQuantization(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t targetSignal[], word16_t filteredAdaptativeCodebookVector[], word16_t convolvedFixedCodebookVector[], word16_t fixedCodebookVector[], word64_t xy64, word64_t yy64,
                      word16_t *quantizedAdaptativeCodebookGain, word16_t *quantizedFixedCodebookGain, uint16_t *gainCodebookStage1, uint16_t *gainCodebookStage2)
{
    int i,j;
    word64_t xz64=0, yz64=0, zz64=0;
    word32_t xy;
    word32_t yy;
    word32_t xz;
    word32_t yz;
    word32_t zz;
    uint16_t minNormalization = 31;
    uint16_t currentNormalization;
    word32_t bestAdaptativeCodebookGain, bestFixedCodebookGain;
    word64_t denominator;
    word16_t predictedFixedCodebookGain;
    uint16_t indexBaseGa=0;
    uint16_t indexBaseGb=0;
    uint16_t indexGa=0, indexGb=0;
    word64_t distanceMin = MAXINT64;

    /*** compute spec 3.9 eq63 terms first on 64 bits and then scale them if needed to fit on 32 ***/
    /* Xy64 and Yy64 already computed during adaptativeCodebookGain computation */
    for (i=0; i<L_SUBFRAME; i++) {
        xz64 = MAC64(xz64, targetSignal[i], convolvedFixedCodebookVector[i]); /* in Q12 */
        yz64 = MAC64(yz64, filteredAdaptativeCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q12 */
        zz64 = MAC64(zz64, convolvedFixedCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q24 */
    }

    /* now scale this terms to have them fit on 32 bits - terms Xy, Xz and Yz shall fit on 31 bits because used in eq63 with a factor 2 */
    xy = SHR64(((xy64<0)?-xy64:xy64),30);
    yy = SHR64(yy64,31);
    xz = SHR64(((xz64<0)?-xz64:xz64),30);
    yz = SHR64(((yz64<0)?-yz64:yz64),30);
    zz = SHR64(zz64,31);

    currentNormalization = countLeadingZeros(xy);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(xz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(yz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(yy);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(zz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }

    if (minNormalization<31) { /* we shall normalise, values are over 32 bits */
        minNormalization = 31 - minNormalization;
        xy = (word32_t)SHR64(xy64, minNormalization);
        yy = (word32_t)SHR64(yy64, minNormalization);
        xz = (word32_t)SHR64(xz64, minNormalization);
        yz = (word32_t)SHR64(yz64, minNormalization);
        zz = (word32_t)SHR64(zz64, minNormalization);

    } else { /* no need to normalise, values already fit on 32 bits, just cast them */
        xy = (word32_t)xy64; /* in Q0 */
        yy = (word32_t)yy64; /* in Q0 */
        xz = (word32_t)xz64; /* in Q12 */
        yz = (word32_t)yz64; /* in Q12 */
        zz = (word32_t)zz64; /* in Q24 */
    }

    /*** compute the best gains minimizinq eq63 ***/
    /* Note this bestgain computation is not at all described in the spec, got it from ITU code */
    /* bestAdaptativeCodebookGain = (zz.xy - xz.yz) / (yy*zz) - yz^2) */
    /* bestfixedCodebookGain = (yy*xz - xy*yz) / (yy*zz) - yz^2) */
    /* best gain are computed in Q9 and Q2 and fits on 16 bits */
    denominator = MAC64(MULT32_32(yy, zz), -yz, yz); /* (yy*zz) - yz^2) in Q24 (always >= 0)*/
    /* avoid division by zero */
    if (denominator==0) { /* consider it to be one */
        bestAdaptativeCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(zz, xy), -xz, yz), 15)); /* MAC in Q24 -> Q9 */
        bestFixedCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(yy, xz), -xy, yz), 10)); /* MAC in Q12 -> Q2 */
    } else {
        /* bestAdaptativeCodebookGain in Q9 */
        uint16_t numeratorNorm;
        word64_t numerator = MAC64(MULT32_32(zz, xy), -xz, yz); /* in Q24 */
        /* check if we can shift it by 9 without overflow as the bestAdaptativeCodebookGain in computed in Q9 */
        word32_t numeratorH = (word32_t)(SHR64(numerator,32));
        numeratorH = (numeratorH>0)?numeratorH:-numeratorH;
        numeratorNorm = countLeadingZeros(numeratorH);
        if (numeratorNorm >= 9) {
            bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator,9), denominator)); /* bestAdaptativeCodebookGain in Q9 */
        } else {
            word64_t shiftedDenominator = SHR64(denominator, 9-numeratorNorm);
            if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */
                bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestAdaptativeCodebookGain in Q9 */
            } else {
                bestAdaptativeCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 9-numeratorNorm); /* shift left the division result to reach Q9 */
            }
        }

        numerator = MAC64(MULT32_32(yy, xz), -xy, yz); /* in Q12 */
        /* check if we can shift it by 14(it's in Q12 and denominator in Q24) without overflow as the bestFixedCodebookGain in computed in Q2 */
        numeratorH = (word32_t)(SHR64(numerator,32));
        numeratorH = (numeratorH>0)?numeratorH:-numeratorH;
        numeratorNorm = countLeadingZeros(numeratorH);

        if (numeratorNorm >= 14) {
            bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator,14), denominator));
        } else {
            word64_t shiftedDenominator = SHR64(denominator, 14-numeratorNorm); /* bestFixedCodebookGain in Q14 */
            if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */
                bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestFixedCodebookGain in Q14 */
            } else {
                bestFixedCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 14-numeratorNorm); /* shift left the division result to reach Q14 */
            }
        }
    }

    /*** Compute the predicted gain as in spec 3.9.1 eq71 in Q6 ***/
    predictedFixedCodebookGain = (word16_t)(SHR32(MACodeGainPrediction(encoderChannelContext->previousGainPredictionError, fixedCodebookVector), 12)); /* in Q16 -> Q4 range [3,1830] */

    /***  preselection spec 3.9.2 ***/
    /* Note: spec just says to select the best 50% of each vector, ITU code go through magical constant computation to select the begining of a continuous range */
    /* much more simple here : vector are ordened in growing order so just select 2 (4 for Gb) indexes before the first value to be superior to the best gain previously computed */
    while (indexBaseGa<6 && bestFixedCodebookGain>(MULT16_16_Q14(GACodebook[indexBaseGa][1],predictedFixedCodebookGain))) { /* bestFixedCodebookGain> in Q2, GACodebook in Q12 *predictedFixedCodebookGain in Q4 -> Q16-14 */
        indexBaseGa++;
    }
    if (indexBaseGa>0) indexBaseGa--;
    if (indexBaseGa>0) indexBaseGa--;
    while (indexBaseGb<12 && bestAdaptativeCodebookGain>(SHR(GBCodebook[indexBaseGb][0],5))) {
        indexBaseGb++;
    }
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;

    /*** test all possibilities of Ga and Gb indexes and select the best one ***/
    xy = -SHL(xy,1); /* xy term is always used with a -2 factor */
    xz = -SHL(xz,1); /* xz term is always used with a -2 factor */
    yz = SHL(yz,1); /* yz term is always used with a 2 factor */

    for (i=0; i<4; i++) {
        for (j=0; j<8; j++) {
            /* compute gamma->gc and gp */
            word16_t gp =  ADD16(GACodebook[i+indexBaseGa][0], GBCodebook[j+indexBaseGb][0]); /* result in Q14 */
            word16_t gamma =  ADD16(GACodebook[i+indexBaseGa][1], GBCodebook[j+indexBaseGb][1]); /* result in Q3.12 (range [0.185, 5.05])*/
            word32_t gc = MULT16_16_Q14(gamma, predictedFixedCodebookGain); /* gamma in Q12, predictedFixedCodebookGain in Q4 -> Q16 -14 -> Q2 */

            /* compute E as in eq63 (first term excluded) */
            word64_t acc = MULT32_32(MULT16_16(gp, gp), yy); /* acc = gp^2*yy  gp in Q14, yy in Q0 -> acc in Q28 */
            acc = MAC64(acc, MULT16_16(gc, gc), zz); /* gc in Q2, zz in Q24 -> acc in Q28, note gc is on 32 bits but in a range making gc^2 fitting on 32 bits */
            acc = MAC64(acc, SHL32((word32_t)gp, 14), xy); /* gp in Q14 shifted to Q28, xy in Q0 -> acc in Q28 */
            acc = MAC64(acc, SHL32(gc, 14), xz); /* gc in Q2 shifted to Q16, xz in Q12 -> acc in Q28 */
            acc = MAC64(acc, MULT16_16(gp,gc), yz); /* gp in Q14, gc in Q2 yz in Q12 -> acc in Q28 */

            if (acc<distanceMin) {
                distanceMin = acc;
                indexGa = i+indexBaseGa;
                indexGb = j+indexBaseGb;
                *quantizedAdaptativeCodebookGain = gp;
                *quantizedFixedCodebookGain = (word16_t)SHR(gc, 1);
            }
        }
    }

    /* update the previous gain prediction error */
    computeGainPredictionError(ADD16(GACodebook[indexGa][1], GBCodebook[indexGb][1]), encoderChannelContext->previousGainPredictionError);

    /* mapping of indexes */
    *gainCodebookStage1 = indexMappingGA[indexGa];
    *gainCodebookStage2 = indexMappingGB[indexGb];

    return;
}