bool SamplerJitCache::Jit_Decode5551() { MOV(32, R(tempReg2), R(resultReg)); MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg2), Imm32(0x0000001F)); AND(32, R(tempReg1), Imm32(0x000003E0)); SHL(32, R(tempReg1), Imm8(3)); OR(32, R(tempReg2), R(tempReg1)); MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg1), Imm32(0x00007C00)); SHL(32, R(tempReg1), Imm8(6)); OR(32, R(tempReg2), R(tempReg1)); // Expand 5 -> 8. After this is just A. MOV(32, R(tempReg1), R(tempReg2)); SHL(32, R(tempReg2), Imm8(3)); SHR(32, R(tempReg1), Imm8(2)); // Chop off the bits that were shifted out. AND(32, R(tempReg1), Imm32(0x00070707)); OR(32, R(tempReg2), R(tempReg1)); // For A, we shift it to a single bit, and then subtract and XOR. // That's probably the simplest way to expand it... SHR(32, R(resultReg), Imm8(15)); // If it was 0, it's now -1, otherwise it's 0. Easy. SUB(32, R(resultReg), Imm8(1)); XOR(32, R(resultReg), Imm32(0xFF000000)); AND(32, R(resultReg), Imm32(0xFF000000)); OR(32, R(resultReg), R(tempReg2)); return true; }
bool SamplerJitCache::Jit_Decode5650() { MOV(32, R(tempReg2), R(resultReg)); AND(32, R(tempReg2), Imm32(0x0000001F)); // B (we do R and B at the same time, they're both 5.) MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg1), Imm32(0x0000F800)); SHL(32, R(tempReg1), Imm8(5)); OR(32, R(tempReg2), R(tempReg1)); // Expand 5 -> 8. At this point we have 00BB00RR. MOV(32, R(tempReg1), R(tempReg2)); SHL(32, R(tempReg2), Imm8(3)); SHR(32, R(tempReg1), Imm8(2)); OR(32, R(tempReg2), R(tempReg1)); AND(32, R(tempReg2), Imm32(0x00FF00FF)); // Now's as good a time to put in A as any. OR(32, R(tempReg2), Imm32(0xFF000000)); // Last, we need to align, extract, and expand G. // 3 to align to G, and then 2 to expand to 8. SHL(32, R(resultReg), Imm8(3 + 2)); AND(32, R(resultReg), Imm32(0x0000FC00)); MOV(32, R(tempReg1), R(resultReg)); // 2 to account for resultReg being preshifted, 4 for expansion. SHR(32, R(tempReg1), Imm8(2 + 4)); OR(32, R(resultReg), R(tempReg1)); AND(32, R(resultReg), Imm32(0x0000FF00)); OR(32, R(resultReg), R(tempReg2));; return true; }
word32_t ChebyshevPolynomial(word16_t x, word32_t f[]) { /* bk in Q15*/ word32_t bk; word32_t bk1 = ADD32(SHL(x,1), f[1]); /* init: b4=2x+f1 */ word32_t bk2 = ONE_IN_Q15; /* init: b5=1 */ uint8_t k; for (k=3; k>0; k--) { /* at the end of loop execution we have b1 in bk1 and b2 in bk2 */ bk = SUB32(ADD32(SHL(MULT16_32_Q15(x,bk1), 1), f[5-k]), bk2); /* bk = 2*x*bk1 − bk2 + f(5-k) all in Q15*/ bk2 = bk1; bk1 = bk; } return SUB32(ADD32(MULT16_32_Q15(x,bk1), SHR(f[5],1)), bk2); /* C(x) = x*b1 - b2 + f(5)/2 */ }
static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m ) { kiss_fft_cpx * Fout2; kiss_fft_cpx * tw1 = st->twiddles; kiss_fft_cpx t; Fout2 = Fout + m; if (!st->inverse) { int i; kiss_fft_cpx *x=Fout; for (i=0; i<2*m; i++) { x[i].r = SHR(x[i].r,1); x[i].i = SHR(x[i].i,1); } } do { C_MUL (t, *Fout2 , *tw1); tw1 += fstride; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); ++Fout2; ++Fout; } while (--m); }
void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *y1, spx_sig_t *y2, int N, int M, spx_word16_t *mem, char *stack) { int i,j,k,M2; spx_word16_t *a; spx_word16_t *x; spx_word16_t *x2; a = PUSH(stack, M, spx_word16_t); x = PUSH(stack, N+M-1, spx_word16_t); x2=x+M-1; M2=M>>1; for (i=0;i<M;i++) a[M-i-1]= aa[i]; for (i=0;i<M-1;i++) x[i]=mem[M-i-2]; for (i=0;i<N;i++) x[i+M-1]=SATURATE(PSHR(xx[i],1),16383); for (i=0,k=0;i<N;i+=2,k++) { y1[k]=0; y2[k]=0; for (j=0;j<M2;j++) { y1[k]+=SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1); y2[k]-=SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1); j++; y1[k]+=SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1); y2[k]+=SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1); } } for (i=0;i<M-1;i++) mem[i]=SATURATE(PSHR(xx[N-i-1],1),16383); }
bool SamplerJitCache::Jit_GetTexDataSwizzled4() { // Get the horizontal tile pos into tempReg1. LEA(32, tempReg1, MScaled(uReg, SCALE_4, 0)); // Note: imm8 sign extends negative. AND(32, R(tempReg1), Imm8(~127)); // Add vertical offset inside tile to tempReg1. LEA(32, tempReg2, MScaled(vReg, SCALE_4, 0)); AND(32, R(tempReg2), Imm8(31)); LEA(32, tempReg1, MComplex(tempReg1, tempReg2, SCALE_4, 0)); // Add srcReg, since we'll need it at some point. ADD(64, R(tempReg1), R(srcReg)); // Now find the vertical tile pos, and add to tempReg1. SHR(32, R(vReg), Imm8(3)); LEA(32, EAX, MScaled(bufwReg, SCALE_4, 0)); MUL(32, R(vReg)); ADD(64, R(tempReg1), R(EAX)); // Last and possible also least, the horizontal offset inside the tile. AND(32, R(uReg), Imm8(31)); SHR(32, R(uReg), Imm8(1)); MOV(8, R(resultReg), MRegSum(tempReg1, uReg)); FixupBranch skipNonZero = J_CC(CC_NC); // If the horizontal offset was odd, take the upper 4. SHR(8, R(resultReg), Imm8(4)); SetJumpTarget(skipNonZero); // Zero out the rest of the bits. AND(32, R(resultReg), Imm8(0x0F)); return true; }
static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_state *st, int m, int N, int mm ) { kiss_fft_cpx * Fout2; const kiss_twiddle_cpx * tw1; int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { kiss_fft_cpx t; Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1); Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1); C_MUL (t, *Fout2 , *tw1); tw1 += fstride; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); ++Fout2; ++Fout; } } }
bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) { if (id.swizzle) { return Jit_GetTexDataSwizzled(id, bitsPerTexel); } // srcReg might be EDX, so let's copy that before we multiply. switch (bitsPerTexel) { case 32: case 16: case 8: LEA(64, tempReg1, MComplex(srcReg, uReg, bitsPerTexel / 8, 0)); break; case 4: { XOR(32, R(tempReg2), R(tempReg2)); SHR(32, R(uReg), Imm8(1)); FixupBranch skip = J_CC(CC_NC); // Track whether we shifted a 1 off or not. MOV(32, R(tempReg2), Imm32(4)); SetJumpTarget(skip); LEA(64, tempReg1, MRegSum(srcReg, uReg)); break; } default: return false; } MOV(32, R(EAX), R(vReg)); MUL(32, R(bufwReg)); switch (bitsPerTexel) { case 32: case 16: case 8: MOVZX(32, bitsPerTexel, resultReg, MComplex(tempReg1, RAX, bitsPerTexel / 8, 0)); break; case 4: { SHR(32, R(RAX), Imm8(1)); MOV(8, R(resultReg), MRegSum(tempReg1, RAX)); // RCX is now free. MOV(8, R(RCX), R(tempReg2)); SHR(8, R(resultReg), R(RCX)); // Zero out any bits not shifted off. AND(32, R(resultReg), Imm8(0x0F)); break; } default: return false; } return true; }
/* Return 1 if A + B does not overflow: */ static int time_t_int_add_ok(time_t a, int b) { verify(int_no_wider_than_time_t, (INT_MAX <= TIME_T_MAX)); if (WRAPV) { time_t sum = (a + b); return ((sum < a) == (b < 0)); } else { int a_odd; time_t avg; a_odd = (int)(a & 1); avg = (SHR(a, 1) + (SHR(b, 1) + (a_odd & b))); return (((TIME_T_MIN / 2) <= avg) && (avg <= (TIME_T_MAX / 2))); } }
/* By segher */ void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N, int M, spx_word32_t *mem, char *stack) /* assumptions: all odd x[i] are zero -- well, actually they are left out of the array now N and M are multiples of 4 */ { int i, j; spx_word16_t *xx; xx= PUSH(stack, M+N-1, spx_word16_t); for (i = 0; i < N/2; i++) xx[2*i] = SHR(x[N/2-1-i],SIG_SHIFT+1); for (i = 0; i < M - 1; i += 2) xx[N+i] = mem[i+1]; for (i = 0; i < N; i += 4) { spx_sig_t y0, y1, y2, y3; spx_word16_t x0; y0 = y1 = y2 = y3 = 0; x0 = xx[N-4-i]; for (j = 0; j < M; j += 4) { spx_word16_t x1; spx_word16_t a0, a1; a0 = a[j]; a1 = a[j+1]; x1 = xx[N-2+j-i]; y0 += SHR(MULT16_16(a0, x1),1); y1 += SHR(MULT16_16(a1, x1),1); y2 += SHR(MULT16_16(a0, x0),1); y3 += SHR(MULT16_16(a1, x0),1); a0 = a[j+2]; a1 = a[j+3]; x0 = xx[N+j-i]; y0 += SHR(MULT16_16(a0, x0),1); y1 += SHR(MULT16_16(a1, x0),1); y2 += SHR(MULT16_16(a0, x1),1); y3 += SHR(MULT16_16(a1, x1),1); } y[i] = y0; y[i+1] = y1; y[i+2] = y2; y[i+3] = y3; } for (i = 0; i < M - 1; i += 2) mem[i+1] = xx[i]; }
// In: RAX: s64 _Value void DSPEmitter::Update_SR_Register16(X64Reg val) { OpArg sr_reg; gpr.GetReg(DSP_REG_SR, sr_reg); AND(16, sr_reg, Imm16(~SR_CMP_MASK)); // // 0x04 // if (_Value == 0) g_dsp.r[DSP_REG_SR] |= SR_ARITH_ZERO; TEST(64, R(val), R(val)); FixupBranch notZero = J_CC(CC_NZ); OR(16, sr_reg, Imm16(SR_ARITH_ZERO | SR_TOP2BITS)); FixupBranch end = J(); SetJumpTarget(notZero); // // 0x08 // if (_Value < 0) g_dsp.r[DSP_REG_SR] |= SR_SIGN; FixupBranch greaterThanEqual = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_SIGN)); SetJumpTarget(greaterThanEqual); // // 0x20 - Checks if top bits of m are equal // if ((((u16)_Value >> 14) == 0) || (((u16)_Value >> 14) == 3)) SHR(16, R(val), Imm8(14)); TEST(16, R(val), R(val)); FixupBranch isZero = J_CC(CC_Z); CMP(16, R(val), Imm16(3)); FixupBranch notThree = J_CC(CC_NE); SetJumpTarget(isZero); // g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS; OR(16, sr_reg, Imm16(SR_TOP2BITS)); SetJumpTarget(notThree); SetJumpTarget(end); gpr.PutReg(DSP_REG_SR); }
int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len) { int i; spx_sig_t max_val=1; int sig_shift; for (i=0;i<len;i++) { spx_sig_t tmp = x[i]; if (tmp<0) tmp = -tmp; if (tmp >= max_val) max_val = tmp; } sig_shift=0; while (max_val>max_scale) { sig_shift++; max_val >>= 1; } for (i=0;i<len;i++) y[i] = SHR(x[i], sig_shift); return sig_shift; }
uint16_t findOpenLoopPitchDelay(word16_t weightedInputSignal[]) { int i; /*** scale the signal to avoid overflows ***/ word16_t scaledWeightedInputSignalBuffer[MAXIMUM_INT_PITCH_DELAY+L_FRAME]; /* this buffer might store the scaled version of input Signal, if scaling is not needed, it is not used */ word16_t *scaledWeightedInputSignal; /* points to the begining of present frame either scaled or directly the input signal */ word64_t autocorrelation = 0; uint16_t indexRange1=0, indexRange2=0, indexRange3Even=0, indexRange3; word32_t correlationMaxRange1; word32_t correlationMaxRange2; word32_t correlationMaxRange3; word32_t correlationMaxRange3Odd; word32_t autoCorrelationRange1; word32_t autoCorrelationRange2; word32_t autoCorrelationRange3; word32_t normalisedCorrelationMaxRange1; word32_t normalisedCorrelationMaxRange2; word32_t normalisedCorrelationMaxRange3; uint16_t indexMultiple; /* compute on 64 bits the autocorrelation on the input signal and if needed scale to have it on 32 bits */ for (i=-MAXIMUM_INT_PITCH_DELAY; i<L_FRAME; i++) { autocorrelation = MAC64(autocorrelation, weightedInputSignal[i], weightedInputSignal[i]); } if (autocorrelation>MAXINT32) { int overflowScale; scaledWeightedInputSignal = &(scaledWeightedInputSignalBuffer[MAXIMUM_INT_PITCH_DELAY]); overflowScale = PSHR(31-countLeadingZeros((word32_t)(autocorrelation>>31)),1); /* count number of bits needed over the 31 bits allowed and divide by 2 to get the right scaling for the signal */ for (i=-MAXIMUM_INT_PITCH_DELAY; i<L_FRAME; i++) { scaledWeightedInputSignal[i] = SHR(weightedInputSignal[i], overflowScale); } } else { /* scaledWeightedInputSignal points directly to weightedInputSignal */
spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed) { spx_word32_t res; *seed = 1664525 * *seed + 1013904223; res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std); return SUB32(res, SHR(res, 3)); }
/* Return 1 if A + B does not overflow. */ static int time_t_int_add_ok (time_t a, int b) { verify (int_no_wider_than_time_t, INT_MAX <= TIME_T_MAX); if (WRAPV) { time_t sum = a + b; return (sum < a) == (b < 0); } else { int a_odd = a & 1; time_t avg = SHR (a, 1) + (SHR (b, 1) + (a_odd & b)); return TIME_T_MIN / 2 <= avg && avg <= TIME_T_MAX / 2; } }
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) { GEPaletteFormat fmt = (GEPaletteFormat)id.clutfmt; if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) { // This is simple - just mask if necessary. if (bitsPerIndex > 8) { AND(32, R(resultReg), Imm32(0x000000FF)); } return true; } MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate.clutformat)); MOV(32, R(tempReg1), MatR(tempReg1)); // Shift = (clutformat >> 2) & 0x1F if (id.hasClutShift) { MOV(32, R(RCX), R(tempReg1)); SHR(32, R(RCX), Imm8(2)); AND(32, R(RCX), Imm8(0x1F)); SHR(32, R(resultReg), R(RCX)); } // Mask = (clutformat >> 8) & 0xFF if (id.hasClutMask) { MOV(32, R(tempReg2), R(tempReg1)); SHR(32, R(tempReg2), Imm8(8)); AND(32, R(resultReg), R(tempReg2)); } // We need to wrap any entries beyond the first 1024 bytes. u32 offsetMask = fmt == GE_CMODE_32BIT_ABGR8888 ? 0x00FF : 0x01FF; // We must mask to 0xFF before ORing 0x100 in 16 bit CMODEs. // But skip if we'll mask 0xFF after offset anyway. if (bitsPerIndex > 8 && (!id.hasClutOffset || offsetMask != 0x00FF)) { AND(32, R(resultReg), Imm32(0x000000FF)); } // Offset = (clutformat >> 12) & 0x01F0 if (id.hasClutOffset) { SHR(32, R(tempReg1), Imm8(16)); SHL(32, R(tempReg1), Imm8(4)); OR(32, R(resultReg), R(tempReg1)); AND(32, R(resultReg), Imm32(offsetMask)); } return true; }
/* FIXME: These functions are ugly and probably introduce too much error */ void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) { int i; for (i=0;i<len;i++) { y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7); } }
/*Makes sure the LSPs are stable*/ void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin) { int i; spx_word16_t m = margin; spx_word16_t m2 = 25736-margin; if (lsp[0]<m) lsp[0]=m; if (lsp[len-1]>m2) lsp[len-1]=m2; for (i=1;i<len-1;i++) { if (lsp[i]<lsp[i-1]+m) lsp[i]=lsp[i-1]+m; if (lsp[i]>lsp[i+1]-m) lsp[i]= SHR(lsp[i],1) + SHR(lsp[i+1]-m,1); } }
static inline time_t ydhms_diff (long_int year1, long_int yday1, int hour1, int min1, int sec1, int year0, int yday0, int hour0, int min0, int sec0) { verify (C99_integer_division, -1 / 2 == 0); /* Compute intervening leap days correctly even if year is negative. Take care to avoid integer overflow here. */ int a4 = SHR (year1, 2) + SHR (TM_YEAR_BASE, 2) - ! (year1 & 3); int b4 = SHR (year0, 2) + SHR (TM_YEAR_BASE, 2) - ! (year0 & 3); int a100 = a4 / 25 - (a4 % 25 < 0); int b100 = b4 / 25 - (b4 % 25 < 0); int a400 = SHR (a100, 2); int b400 = SHR (b100, 2); int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); /* Compute the desired time in time_t precision. Overflow might occur here. */ time_t tyear1 = year1; time_t years = tyear1 - year0; time_t days = 365 * years + yday1 - yday0 + intervening_leap_days; time_t hours = 24 * days + hour1 - hour0; time_t minutes = 60 * hours + min1 - min0; time_t seconds = 60 * minutes + sec1 - sec0; return seconds; }
static time_t ydhms_diff(long_int year1, long_int yday1, int hour1, int min1, int sec1, int year0, int yday0, int hour0, int min0, int sec0) { verify(C99_integer_division, ((-1 / 2) == 0)); /* Compute intervening leap days correctly even if year is negative. * Take care to avoid integer overflow here: */ int a4 = (int)(SHR(year1, 2) + SHR(TM_YEAR_BASE, 2) - !(year1 & 3)); int b4 = (SHR(year0, 2) + SHR(TM_YEAR_BASE, 2) - !(year0 & 3)); int a100 = ((a4 / 25) - ((a4 % 25) < 0)); int b100 = ((b4 / 25) - ((b4 % 25) < 0)); int a400 = SHR(a100, 2); int b400 = SHR(b100, 2); int intervening_leap_days = ((a4 - b4) - (a100 - b100) + (a400 - b400)); /* Compute the desired time in time_t precision. Overflow might occur here: */ time_t tyear1 = year1; time_t years = (tyear1 - year0); time_t days = ((365 * years) + yday1 - yday0 + intervening_leap_days); time_t hours = ((24 * days) + hour1 - hour0); time_t minutes = ((60 * hours) + min1 - min0); time_t seconds = ((60 * minutes) + sec1 - sec0); return seconds; }
static int tm_diff (const struct tm *a, const struct tm *b) { /* Compute intervening leap days correctly even if year is negative. Take care to avoid int overflow in leap day calculations, but it's OK to assume that A and B are close to each other. */ int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3); int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3); int a100 = a4 / 25 - (a4 % 25 < 0); int b100 = b4 / 25 - (b4 % 25 < 0); int a400 = SHR (a100, 2); int b400 = SHR (b100, 2); int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); int years = a->tm_year - b->tm_year; int days = (365 * years + intervening_leap_days + (a->tm_yday - b->tm_yday)); return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + (a->tm_min - b->tm_min)) + (a->tm_sec - b->tm_sec)); }
/* Yield the difference between *A and *B, measured in seconds, ignoring leap seconds. The body of this function is taken directly from the GNU C Library; see src/strftime.c. */ static long int tm_diff (struct tm const *a, struct tm const *b) { /* Compute intervening leap days correctly even if year is negative. Take care to avoid int overflow in leap day calculations. */ int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3); int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3); int a100 = a4 / 25 - (a4 % 25 < 0); int b100 = b4 / 25 - (b4 % 25 < 0); int a400 = SHR (a100, 2); int b400 = SHR (b100, 2); int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); long int ayear = a->tm_year; long int years = ayear - b->tm_year; long int days = (365 * years + intervening_leap_days + (a->tm_yday - b->tm_yday)); return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + (a->tm_min - b->tm_min)) + (a->tm_sec - b->tm_sec)); }
spx_word16_t compute_rms(const spx_sig_t *x, int len) { int i; spx_word32_t sum=0; spx_sig_t max_val=1; int sig_shift; for (i=0;i<len;i++) { spx_sig_t tmp = x[i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } sig_shift=0; while (max_val>16383) { sig_shift++; max_val >>= 1; } for (i=0;i<len;i+=4) { spx_word32_t sum2=0; spx_word16_t tmp; tmp = SHR(x[i],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+1],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+2],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+3],sig_shift); sum2 += MULT16_16(tmp,tmp); sum += SHR(sum2,6); } return SHR(SHL((spx_word32_t)spx_sqrt(1+DIV32(sum,len)),(sig_shift+3)),SIG_SHIFT); }
void split_cb_search_shape_sign( spx_sig_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_sig_t *r, SpeexBits *bits, char *stack, int complexity ) { int i,j,k,m,n,q; spx_word16_t *resp; #ifdef _USE_SSE __m128 *resp2; __m128 *E; #else spx_word16_t *resp2; spx_word32_t *E; #endif spx_word16_t *t; spx_sig_t *e, *r2; spx_word16_t *tmp; spx_word32_t *ndist, *odist; int *itmp; spx_word16_t **ot, **nt; int **nind, **oind; int *ind; const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; split_cb_params *params; int N=2; int *best_index; spx_word32_t *best_dist; int have_sign; N=complexity; if (N>10) N=10; ot=PUSH(stack, N, spx_word16_t*); nt=PUSH(stack, N, spx_word16_t*); oind=PUSH(stack, N, int*); nind=PUSH(stack, N, int*); params = (split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128); E = PUSH(stack, shape_cb_size>>2, __m128); #else resp2 = resp; E = PUSH(stack, shape_cb_size, spx_word32_t); #endif t = PUSH(stack, nsf, spx_word16_t); e = PUSH(stack, nsf, spx_sig_t); r2 = PUSH(stack, nsf, spx_sig_t); ind = PUSH(stack, nb_subvect, int); tmp = PUSH(stack, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot[i]=tmp; tmp += nsf; nt[i]=tmp; tmp += nsf; } best_index = PUSH(stack, N, int); best_dist = PUSH(stack, N, spx_word32_t); ndist = PUSH(stack, N, spx_word32_t); odist = PUSH(stack, N, spx_word32_t); itmp = PUSH(stack, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp; itmp+=nb_subvect; oind[i]=itmp; itmp+=nb_subvect; for (j=0;j<nb_subvect;j++) nind[i][j]=oind[i][j]=-1; } /* FIXME: make that adaptive? */ for (i=0;i<nsf;i++) t[i]=SHR(target[i],6); for (j=0;j<N;j++) for (i=0;i<nsf;i++) ot[j][i]=t[i]; /*for (i=0;i<nsf;i++) printf ("%d\n", (int)t[i]);*/ /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=-2; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; /*Find new n-best based on previous n-best j*/ if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { spx_word16_t *ct; spx_word32_t err=0; ct = ot[j]; /*update target*/ /*previous target*/ for (m=i*subvect_size;m<(i+1)*subvect_size;m++) t[m]=ct[m]; /* New code: update only enough of the target to calculate error*/ { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] -= res[m]; else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] += res[m]; } /*compute error (distance)*/ err=odist[j]; for (m=i*subvect_size;m<(i+1)*subvect_size;m++) err += t[m]*t[m]; /*update n-best list*/ if (err<ndist[N-1] || ndist[N-1]<-1) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) t[m]=ct[m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q])); #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],g*r[q]); #endif } for (m=0;m<N;m++) { if (err < ndist[m] || ndist[m]<-1) { for (n=N-1;n>m;n--) { for (q=(i+1)*subvect_size;q<nsf;q++) nt[n][q]=nt[n-1][q]; for (q=0;q<nb_subvect;q++) nind[n][q]=nind[n-1][q]; ndist[n]=ndist[n-1]; } for (q=(i+1)*subvect_size;q<nsf;q++) nt[m][q]=t[q]; for (q=0;q<nb_subvect;q++) nind[m][q]=oind[j][q]; nind[m][i]=best_index[k]; ndist[m]=err; break; } } } } if (i==0) break; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]+=e[j]; /* Update target */ syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]-=r2[j]; }
/* Convert *TP to a time_t value, inverting the monotonic and mostly-unit-linear conversion function CONVERT. Use *OFFSET to keep track of a guess at the offset of the result, compared to what the result would be for UTC without leap seconds. If *OFFSET's guess is correct, only one CONVERT call is needed. This function is external because it is used also by timegm.c. */ time_t __mktime_internal(struct tm *tp, struct tm *(*convert)(const time_t *, struct tm *), time_t *offset) { time_t t, gt, t0, t1, t2; struct tm tm; /* The maximum number of probes (calls to CONVERT) should be enough to handle any combinations of time zone rule changes, solar time, leap seconds, and oscillations around a spring-forward gap. POSIX.1 prohibits leap seconds, but some hosts have them anyway. */ int remaining_probes = 6; /* Time requested. Copy it in case CONVERT modifies *TP; this can occur if TP is localtime's returned value and CONVERT is localtime. */ int sec = tp->tm_sec; int min = tp->tm_min; int hour = tp->tm_hour; int mday = tp->tm_mday; int mon = tp->tm_mon; int year_requested = tp->tm_year; int isdst = tp->tm_isdst; /* 1 if the previous probe was DST. */ int dst2; /* Ensure that mon is in range, and set year accordingly. */ int mon_remainder = mon % 12; int negative_mon_remainder = mon_remainder < 0; int mon_years = mon / 12 - negative_mon_remainder; long_int lyear_requested = year_requested; long_int year = lyear_requested + mon_years; /* The other values need not be in range: the remaining code handles minor overflows correctly, assuming int and time_t arithmetic wraps around. Major overflows are caught at the end. */ /* Calculate day of year from year, month, and day of month. The result need not be in range. */ int mon_yday = ((__mon_yday[leapyear (year)] [mon_remainder + 12 * negative_mon_remainder]) - 1); long_int lmday = mday; long_int yday = mon_yday + lmday; time_t guessed_offset = *offset; int sec_requested = sec; if (LEAP_SECONDS_POSSIBLE) { /* Handle out-of-range seconds specially, since ydhms_tm_diff assumes every minute has 60 seconds. */ if (sec < 0) sec = 0; if (59 < sec) sec = 59; } /* Invert CONVERT by probing. First assume the same offset as last time: */ t0 = ydhms_diff(year, yday, hour, min, sec, (EPOCH_YEAR - TM_YEAR_BASE), 0, 0, 0, (int)-(guessed_offset)); if ((TIME_T_MAX / INT_MAX / 366 / 24 / 60 / 60) < 3) { /* time_t is NOT large enough to rule out overflows, so check for major overflows. A gross check suffices, since if t0 has overflowed, it is off by a multiple of TIME_T_MAX - TIME_T_MIN + 1. So ignore any component of the difference that is bounded by a small value. */ /* Approximate log base 2 of the number of time units per biennium. A biennium is 2 years; use this unit instead of years to avoid integer overflow. For example, 2 average Gregorian years are 2 * 365.2425 * 24 * 60 * 60 seconds, which is 63113904 seconds, and rint (log2 (63113904)) is 26. */ int ALOG2_SECONDS_PER_BIENNIUM = 26; int ALOG2_MINUTES_PER_BIENNIUM = 20; int ALOG2_HOURS_PER_BIENNIUM = 14; int ALOG2_DAYS_PER_BIENNIUM = 10; int LOG2_YEARS_PER_BIENNIUM = 1; int approx_requested_biennia = (SHR (year_requested, LOG2_YEARS_PER_BIENNIUM) - SHR (EPOCH_YEAR - TM_YEAR_BASE, LOG2_YEARS_PER_BIENNIUM) + SHR (mday, ALOG2_DAYS_PER_BIENNIUM) + SHR (hour, ALOG2_HOURS_PER_BIENNIUM) + SHR (min, ALOG2_MINUTES_PER_BIENNIUM) + (LEAP_SECONDS_POSSIBLE ? 0 : SHR (sec, ALOG2_SECONDS_PER_BIENNIUM))); int approx_biennia = (int)SHR(t0, ALOG2_SECONDS_PER_BIENNIUM); int diff = approx_biennia - approx_requested_biennia; int approx_abs_diff = diff < 0 ? -1 - diff : diff; /* IRIX 4.0.5 cc miscalculates TIME_T_MIN / 3: it erroneously gives a positive value of 715827882. Setting a variable first then doing math on it seems to work. ([email protected]) */ time_t time_t_max = TIME_T_MAX; time_t time_t_min = TIME_T_MIN; time_t overflow_threshold = (time_t_max / 3 - time_t_min / 3) >> ALOG2_SECONDS_PER_BIENNIUM; if (overflow_threshold < approx_abs_diff) { /* Overflow occurred. Try repairing it; this might work if the time zone offset is enough to undo the overflow. */ time_t repaired_t0 = (-1 - t0); approx_biennia = (int)SHR(repaired_t0, ALOG2_SECONDS_PER_BIENNIUM); diff = (approx_biennia - approx_requested_biennia); approx_abs_diff = diff < 0 ? -1 - diff : diff; if (overflow_threshold < approx_abs_diff) return -1; guessed_offset += repaired_t0 - t0; t0 = repaired_t0; } }
void Jit::Comp_mxc1(MIPSOpcode op) { CONDITIONAL_DISABLE; int fs = _FS; MIPSGPReg rt = _RT; switch((op >> 21) & 0x1f) { case 0: // R(rt) = FI(fs); break; //mfc1 if (rt != MIPS_REG_ZERO) { fpr.MapReg(fs, true, false); // TODO: Seems the V register becomes dirty here? It shouldn't. gpr.MapReg(rt, false, true); MOVD_xmm(gpr.R(rt), fpr.RX(fs)); } break; case 2: // R(rt) = currentMIPS->ReadFCR(fs); break; //cfc1 if (fs == 31) { bool wasImm = gpr.IsImm(MIPS_REG_FPCOND); if (!wasImm) { gpr.Lock(rt, MIPS_REG_FPCOND); gpr.MapReg(MIPS_REG_FPCOND, true, false); } gpr.MapReg(rt, false, true); MOV(32, gpr.R(rt), M(&mips_->fcr31)); if (wasImm) { if (gpr.GetImm(MIPS_REG_FPCOND) & 1) { OR(32, gpr.R(rt), Imm32(1 << 23)); } else { AND(32, gpr.R(rt), Imm32(~(1 << 23))); } } else { AND(32, gpr.R(rt), Imm32(~(1 << 23))); MOV(32, R(EAX), gpr.R(MIPS_REG_FPCOND)); AND(32, R(EAX), Imm32(1)); SHL(32, R(EAX), Imm8(23)); OR(32, gpr.R(rt), R(EAX)); } gpr.UnlockAll(); } else if (fs == 0) { gpr.SetImm(rt, MIPSState::FCR0_VALUE); } else { Comp_Generic(op); } return; case 4: //FI(fs) = R(rt); break; //mtc1 gpr.MapReg(rt, true, false); fpr.MapReg(fs, false, true); MOVD_xmm(fpr.RX(fs), gpr.R(rt)); return; case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1 if (fs == 31) { if (gpr.IsImm(rt)) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF)); } else { gpr.Lock(rt, MIPS_REG_FPCOND); gpr.MapReg(rt, true, false); gpr.MapReg(MIPS_REG_FPCOND, false, true); MOV(32, gpr.R(MIPS_REG_FPCOND), gpr.R(rt)); SHR(32, gpr.R(MIPS_REG_FPCOND), Imm8(23)); AND(32, gpr.R(MIPS_REG_FPCOND), Imm32(1)); MOV(32, M(&mips_->fcr31), gpr.R(rt)); AND(32, M(&mips_->fcr31), Imm32(0x0181FFFF)); gpr.UnlockAll(); } } else {
/* Return the average of A and B, even if A + B would overflow: */ static time_t time_t_avg(time_t a, time_t b) { return (SHR(a, 1) + SHR(b, 1) + (a & b & 1)); }
static word_type ROTL(word_type x, unsigned n) { return SHL(x, n) | SHR(x, word_bits-n); }
int crypto_aead_decrypt( unsigned char *m,unsigned long long *mlen, // message unsigned char *nsec, // not relavent to CLOC or SLIC const unsigned char *c,unsigned long long clen, // ciphertext const unsigned char *ad,unsigned long long adlen, // associated data const unsigned char *npub, // nonce const unsigned char *k // the master key ) { block estate, tstate, tmp; // encryption state, tag state, and temporary state estate = SETZERO(); unsigned char ltag[16]; // local copy of temporary tag value unsigned long long i, lastblocklen,j; /* set ciphertext length */ *mlen = clen - CRYPTO_ABYTES; /* generate round keys from master key */ AES128_KeyExpansion(k); /* process the first (partial) block of ad */ load_partial_block(&estate, ad, (adlen>STATE_LEN)?STATE_LEN:adlen, ONE_ZERO_PADDING); fix0(estate); AES128_encrypt(estate, estate); if((ad[0] & 0x80) || (adlen == 0)){ // appy h h(estate); } else{ // do nothing } if(adlen > STATE_LEN){ // ad is of moer than one block i = STATE_LEN; /* process the middle ad blocks, excluding the first and last (partial) block */ while((i+STATE_LEN) < adlen) { tmp = LOAD(ad+i); estate = XOR(estate, tmp); AES128_encrypt(estate, estate); i += STATE_LEN; } /* process the last (partial) ad block */ load_partial_block(&tmp, ad+i, adlen - i, ONE_ZERO_PADDING); estate = XOR(estate, tmp); AES128_encrypt(estate, estate); } /* process the nonce */ load_partial_block(&tmp, npub, CRYPTO_NPUBBYTES, PARAM_OZP); estate = XOR(estate, tmp); if((adlen % STATE_LEN) || (adlen == 0)){ /* apply f2 */ f2(estate); } else{ /* apply f1 */ f1(estate); } /* process ciphertext */ tstate = estate; AES128_encrypt(estate, estate); if(*mlen){ /* apply g2 to tag state */ g2(tstate); } else{ /* apply g1 to tag state */ g1(tstate); } AES128_encrypt(tstate, tstate); i = 0; /* process all the message except for the last message/ciphertext block */ while((i + STATE_LEN) < (*mlen)){ tmp = LOAD(c+i); estate = XOR(estate, tmp); STORE(m+i, estate); tstate = XOR(tmp, tstate); AES128_encrypt(tstate, tstate); fix1(tmp); print_state("after applying fix1\n", estate); AES128_encrypt(tmp, estate); i += STATE_LEN; } /* process the last block of the message/ciphetext */ lastblocklen = (*mlen) - i; if(lastblocklen > 0){ load_partial_block(&tmp, c+i, lastblocklen, ZERO_APPEND); estate = XOR(estate, tmp); print_state("after xoring last partial message block\n", estate); store_partial_block(m+i, estate, lastblocklen); unsigned char shift_bytes = (STATE_LEN - (unsigned char)lastblocklen); tmp = AND(SHR(_mm_set1_epi8(0xff), shift_bytes), tmp); tstate = XOR(tstate, tmp); /* add the one zero padding */ tstate = XOR(tstate, SHL(_mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80), lastblocklen)); if((*mlen) % STATE_LEN){ /* apply f2 */ f2(tstate); } else{ /* apply f1 */ f1(tstate); } AES128_encrypt(tstate, tstate); } /* compare tag and output message */ STORE(ltag, tstate); for(j = 0; j < CRYPTO_ABYTES; j++){ if(ltag[j] != c[clen - CRYPTO_ABYTES + j]) return RETURN_TAG_NO_MATCH; } return RETURN_SUCCESS; }
void gainQuantization(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t targetSignal[], word16_t filteredAdaptativeCodebookVector[], word16_t convolvedFixedCodebookVector[], word16_t fixedCodebookVector[], word64_t xy64, word64_t yy64, word16_t *quantizedAdaptativeCodebookGain, word16_t *quantizedFixedCodebookGain, uint16_t *gainCodebookStage1, uint16_t *gainCodebookStage2) { int i,j; word64_t xz64=0, yz64=0, zz64=0; word32_t xy; word32_t yy; word32_t xz; word32_t yz; word32_t zz; uint16_t minNormalization = 31; uint16_t currentNormalization; word32_t bestAdaptativeCodebookGain, bestFixedCodebookGain; word64_t denominator; word16_t predictedFixedCodebookGain; uint16_t indexBaseGa=0; uint16_t indexBaseGb=0; uint16_t indexGa=0, indexGb=0; word64_t distanceMin = MAXINT64; /*** compute spec 3.9 eq63 terms first on 64 bits and then scale them if needed to fit on 32 ***/ /* Xy64 and Yy64 already computed during adaptativeCodebookGain computation */ for (i=0; i<L_SUBFRAME; i++) { xz64 = MAC64(xz64, targetSignal[i], convolvedFixedCodebookVector[i]); /* in Q12 */ yz64 = MAC64(yz64, filteredAdaptativeCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q12 */ zz64 = MAC64(zz64, convolvedFixedCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q24 */ } /* now scale this terms to have them fit on 32 bits - terms Xy, Xz and Yz shall fit on 31 bits because used in eq63 with a factor 2 */ xy = SHR64(((xy64<0)?-xy64:xy64),30); yy = SHR64(yy64,31); xz = SHR64(((xz64<0)?-xz64:xz64),30); yz = SHR64(((yz64<0)?-yz64:yz64),30); zz = SHR64(zz64,31); currentNormalization = countLeadingZeros(xy); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(xz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(yz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(yy); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(zz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } if (minNormalization<31) { /* we shall normalise, values are over 32 bits */ minNormalization = 31 - minNormalization; xy = (word32_t)SHR64(xy64, minNormalization); yy = (word32_t)SHR64(yy64, minNormalization); xz = (word32_t)SHR64(xz64, minNormalization); yz = (word32_t)SHR64(yz64, minNormalization); zz = (word32_t)SHR64(zz64, minNormalization); } else { /* no need to normalise, values already fit on 32 bits, just cast them */ xy = (word32_t)xy64; /* in Q0 */ yy = (word32_t)yy64; /* in Q0 */ xz = (word32_t)xz64; /* in Q12 */ yz = (word32_t)yz64; /* in Q12 */ zz = (word32_t)zz64; /* in Q24 */ } /*** compute the best gains minimizinq eq63 ***/ /* Note this bestgain computation is not at all described in the spec, got it from ITU code */ /* bestAdaptativeCodebookGain = (zz.xy - xz.yz) / (yy*zz) - yz^2) */ /* bestfixedCodebookGain = (yy*xz - xy*yz) / (yy*zz) - yz^2) */ /* best gain are computed in Q9 and Q2 and fits on 16 bits */ denominator = MAC64(MULT32_32(yy, zz), -yz, yz); /* (yy*zz) - yz^2) in Q24 (always >= 0)*/ /* avoid division by zero */ if (denominator==0) { /* consider it to be one */ bestAdaptativeCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(zz, xy), -xz, yz), 15)); /* MAC in Q24 -> Q9 */ bestFixedCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(yy, xz), -xy, yz), 10)); /* MAC in Q12 -> Q2 */ } else { /* bestAdaptativeCodebookGain in Q9 */ uint16_t numeratorNorm; word64_t numerator = MAC64(MULT32_32(zz, xy), -xz, yz); /* in Q24 */ /* check if we can shift it by 9 without overflow as the bestAdaptativeCodebookGain in computed in Q9 */ word32_t numeratorH = (word32_t)(SHR64(numerator,32)); numeratorH = (numeratorH>0)?numeratorH:-numeratorH; numeratorNorm = countLeadingZeros(numeratorH); if (numeratorNorm >= 9) { bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator,9), denominator)); /* bestAdaptativeCodebookGain in Q9 */ } else { word64_t shiftedDenominator = SHR64(denominator, 9-numeratorNorm); if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */ bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestAdaptativeCodebookGain in Q9 */ } else { bestAdaptativeCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 9-numeratorNorm); /* shift left the division result to reach Q9 */ } } numerator = MAC64(MULT32_32(yy, xz), -xy, yz); /* in Q12 */ /* check if we can shift it by 14(it's in Q12 and denominator in Q24) without overflow as the bestFixedCodebookGain in computed in Q2 */ numeratorH = (word32_t)(SHR64(numerator,32)); numeratorH = (numeratorH>0)?numeratorH:-numeratorH; numeratorNorm = countLeadingZeros(numeratorH); if (numeratorNorm >= 14) { bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator,14), denominator)); } else { word64_t shiftedDenominator = SHR64(denominator, 14-numeratorNorm); /* bestFixedCodebookGain in Q14 */ if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */ bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestFixedCodebookGain in Q14 */ } else { bestFixedCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 14-numeratorNorm); /* shift left the division result to reach Q14 */ } } } /*** Compute the predicted gain as in spec 3.9.1 eq71 in Q6 ***/ predictedFixedCodebookGain = (word16_t)(SHR32(MACodeGainPrediction(encoderChannelContext->previousGainPredictionError, fixedCodebookVector), 12)); /* in Q16 -> Q4 range [3,1830] */ /*** preselection spec 3.9.2 ***/ /* Note: spec just says to select the best 50% of each vector, ITU code go through magical constant computation to select the begining of a continuous range */ /* much more simple here : vector are ordened in growing order so just select 2 (4 for Gb) indexes before the first value to be superior to the best gain previously computed */ while (indexBaseGa<6 && bestFixedCodebookGain>(MULT16_16_Q14(GACodebook[indexBaseGa][1],predictedFixedCodebookGain))) { /* bestFixedCodebookGain> in Q2, GACodebook in Q12 *predictedFixedCodebookGain in Q4 -> Q16-14 */ indexBaseGa++; } if (indexBaseGa>0) indexBaseGa--; if (indexBaseGa>0) indexBaseGa--; while (indexBaseGb<12 && bestAdaptativeCodebookGain>(SHR(GBCodebook[indexBaseGb][0],5))) { indexBaseGb++; } if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; /*** test all possibilities of Ga and Gb indexes and select the best one ***/ xy = -SHL(xy,1); /* xy term is always used with a -2 factor */ xz = -SHL(xz,1); /* xz term is always used with a -2 factor */ yz = SHL(yz,1); /* yz term is always used with a 2 factor */ for (i=0; i<4; i++) { for (j=0; j<8; j++) { /* compute gamma->gc and gp */ word16_t gp = ADD16(GACodebook[i+indexBaseGa][0], GBCodebook[j+indexBaseGb][0]); /* result in Q14 */ word16_t gamma = ADD16(GACodebook[i+indexBaseGa][1], GBCodebook[j+indexBaseGb][1]); /* result in Q3.12 (range [0.185, 5.05])*/ word32_t gc = MULT16_16_Q14(gamma, predictedFixedCodebookGain); /* gamma in Q12, predictedFixedCodebookGain in Q4 -> Q16 -14 -> Q2 */ /* compute E as in eq63 (first term excluded) */ word64_t acc = MULT32_32(MULT16_16(gp, gp), yy); /* acc = gp^2*yy gp in Q14, yy in Q0 -> acc in Q28 */ acc = MAC64(acc, MULT16_16(gc, gc), zz); /* gc in Q2, zz in Q24 -> acc in Q28, note gc is on 32 bits but in a range making gc^2 fitting on 32 bits */ acc = MAC64(acc, SHL32((word32_t)gp, 14), xy); /* gp in Q14 shifted to Q28, xy in Q0 -> acc in Q28 */ acc = MAC64(acc, SHL32(gc, 14), xz); /* gc in Q2 shifted to Q16, xz in Q12 -> acc in Q28 */ acc = MAC64(acc, MULT16_16(gp,gc), yz); /* gp in Q14, gc in Q2 yz in Q12 -> acc in Q28 */ if (acc<distanceMin) { distanceMin = acc; indexGa = i+indexBaseGa; indexGb = j+indexBaseGb; *quantizedAdaptativeCodebookGain = gp; *quantizedFixedCodebookGain = (word16_t)SHR(gc, 1); } } } /* update the previous gain prediction error */ computeGainPredictionError(ADD16(GACodebook[indexGa][1], GBCodebook[indexGb][1]), encoderChannelContext->previousGainPredictionError); /* mapping of indexes */ *gainCodebookStage1 = indexMappingGA[indexGa]; *gainCodebookStage2 = indexMappingGB[indexGb]; return; }