static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant) { int32_t peak; int our_nscale, try_remove; softfloat our_quant; av_assert0(peak_cb <= 0); av_assert0(peak_cb >= -2047); our_nscale = 127; peak = cb_to_level[-peak_cb]; for (try_remove = 64; try_remove > 0; try_remove >>= 1) { if (scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e <= 17) continue; our_quant.m = mul32(scalefactor_inv[our_nscale - try_remove].m, stepsize_inv[abits].m); our_quant.e = scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e - 17; if ((quant_levels[abits] - 1) / 2 < quantize_value(peak, our_quant)) continue; our_nscale -= try_remove; } if (our_nscale >= 125) our_nscale = 124; quant->m = mul32(scalefactor_inv[our_nscale].m, stepsize_inv[abits].m); quant->e = scalefactor_inv[our_nscale].e + stepsize_inv[abits].e - 17; av_assert0((quant_levels[abits] - 1) / 2 >= quantize_value(peak, *quant)); return our_nscale; }
static void lfe_downsample(DCAContext *c, const int32_t *input) { /* FIXME: make 128x LFE downsampling possible */ int i, j, lfes; int32_t hist[512]; int32_t accum; int hist_start = 0; for (i = 0; i < 512; i++) hist[i] = c->history[i][c->channels - 1]; for (lfes = 0; lfes < DCA_LFE_SAMPLES; lfes++) { /* Calculate the convolution */ accum = 0; for (i = hist_start, j = 0; i < 512; i++, j++) accum += mul32(hist[i], lfe_fir_64i[j]); for (i = 0; i < hist_start; i++, j++) accum += mul32(hist[i], lfe_fir_64i[j]); c->downsampled_lfe[lfes] = accum; /* Copy in 64 new samples from input */ for (i = 0; i < 64; i++) hist[i + hist_start] = input[(lfes * 64 + i) * c->channels + c->channels - 1]; hist_start = (hist_start + 64) & 511; } }
static void qmf_decompose(DCAContext *c, int32_t in[32], int32_t out[32], int channel) { int band, i, j, k; int32_t resp; int32_t accum[DCA_SUBBANDS_32] = {0}; add_new_samples(c, in, DCA_SUBBANDS_32, channel); /* Calculate the dot product of the signal with the (possibly inverted) reference decoder's response to this vector: (0.0, 0.0, ..., 0.0, -1.0, 1.0, 0.0, ..., 0.0) so that -1.0 cancels 1.0 from the previous step */ for (k = 48, j = 0, i = c->start[channel]; i < 512; k++, j++, i++) accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]); for (i = 0; i < c->start[channel]; k++, j++, i++) accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]); resp = 0; /* TODO: implement FFT instead of this naive calculation */ for (band = 0; band < DCA_SUBBANDS_32; band++) { for (j = 0; j < 32; j++) resp += mul32(accum[j], band_delta_factor(band, j)); out[band] = (band & 2) ? (-resp) : resp; } }
h_generic_calc_Mul32x4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = mul32(argL->w32[0], argR->w32[0]); res->w32[1] = mul32(argL->w32[1], argR->w32[1]); res->w32[2] = mul32(argL->w32[2], argR->w32[2]); res->w32[3] = mul32(argL->w32[3], argR->w32[3]); }
static int lfe_downsample(DCAContext *c, int32_t in[LFE_INTERPOLATION]) { int i, j; int channel = c->prim_channels; int32_t accum = 0; add_new_samples(c, in, LFE_INTERPOLATION, channel); for (i = c->start[channel], j = 0; i < 512; i++, j++) accum += mul32(c->history[channel][i], lfe_fir_64i[j]); for (i = 0; i < c->start[channel]; i++, j++) accum += mul32(c->history[channel][i], lfe_fir_64i[j]); return accum; }
static void subband_transform(DCAEncContext *c, const int32_t *input) { int ch, subs, i, k, j; for (ch = 0; ch < c->fullband_channels; ch++) { /* History is copied because it is also needed for PSY */ int32_t hist[512]; int hist_start = 0; const int chi = c->channel_order_tab[ch]; for (i = 0; i < 512; i++) hist[i] = c->history[i][ch]; for (subs = 0; subs < SUBBAND_SAMPLES; subs++) { int32_t accum[64]; int32_t resp; int band; /* Calculate the convolutions at once */ for (i = 0; i < 64; i++) accum[i] = 0; for (k = 0, i = hist_start, j = 0; i < 512; k = (k + 1) & 63, i++, j++) accum[k] += mul32(hist[i], c->band_interpolation[j]); for (i = 0; i < hist_start; k = (k + 1) & 63, i++, j++) accum[k] += mul32(hist[i], c->band_interpolation[j]); for (k = 16; k < 32; k++) accum[k] = accum[k] - accum[31 - k]; for (k = 32; k < 48; k++) accum[k] = accum[k] + accum[95 - k]; for (band = 0; band < 32; band++) { resp = 0; for (i = 16; i < 48; i++) { int s = (2 * band + 1) * (2 * (i + 16) + 1); resp += mul32(accum[i], cos_t(s << 3)) >> 3; } c->subband[subs][band][ch] = ((band + 1) & 2) ? -resp : resp; } /* Copy in 32 new samples from input */ for (i = 0; i < 32; i++) hist[i + hist_start] = input[(subs * 32 + i) * c->channels + chi]; hist_start = (hist_start + 32) & 511; } } }
void JIT::compileOpCallVarargs(Instruction* instruction) { int callee = instruction[1].u.operand; int argCountRegister = instruction[2].u.operand; int registerOffset = instruction[3].u.operand; emitGetVirtualRegister(argCountRegister, regT1); emitFastArithImmToInt(regT1); emitGetVirtualRegister(callee, regT0); addPtr(Imm32(registerOffset), regT1, regT2); // Check for JSFunctions. emitJumpSlowCaseIfNotJSCell(regT0); addSlowCase(branchPtr(NotEqual, Address(regT0), TrustedImmPtr(m_globalData->jsFunctionVPtr))); // Speculatively roll the callframe, assuming argCount will match the arity. mul32(TrustedImm32(sizeof(Register)), regT2, regT2); intptr_t offset = (intptr_t)sizeof(Register) * (intptr_t)RegisterFile::CallerFrame; addPtr(Imm32((int32_t)offset), regT2, regT3); addPtr(callFrameRegister, regT3); storePtr(callFrameRegister, regT3); addPtr(regT2, callFrameRegister); emitNakedCall(m_globalData->jitStubs->ctiVirtualCall()); sampleCodeBlock(m_codeBlock); }
void JIT::compileOpCallVarargs(Instruction* instruction) { int callee = instruction[1].u.operand; int argCountRegister = instruction[2].u.operand; int registerOffset = instruction[3].u.operand; emitLoad(callee, regT1, regT0); emitLoadPayload(argCountRegister, regT2); // argCount addPtr(Imm32(registerOffset), regT2, regT3); // registerOffset emitJumpSlowCaseIfNotJSCell(callee, regT1); addSlowCase(branchPtr(NotEqual, Address(regT0), TrustedImmPtr(m_globalData->jsFunctionVPtr))); // Speculatively roll the callframe, assuming argCount will match the arity. mul32(TrustedImm32(sizeof(Register)), regT3, regT3); addPtr(callFrameRegister, regT3); store32(TrustedImm32(JSValue::CellTag), tagFor(RegisterFile::CallerFrame, regT3)); storePtr(callFrameRegister, payloadFor(RegisterFile::CallerFrame, regT3)); move(regT3, callFrameRegister); move(regT2, regT1); // argCount emitNakedCall(m_globalData->jitStubs->ctiVirtualCall()); sampleCodeBlock(m_codeBlock); }
static int32_t quantize_value(int32_t value, softfloat quant) { int32_t offset = 1 << (quant.e - 1); value = mul32(value, quant.m) + offset; value = value >> quant.e; return value; }
static void qmf_init(void) { int i; int32_t c[17], s[17]; s[0] = 0; /* sin(index * PI / 64) * 0x7fffffff */ c[0] = 0x7fffffff; /* cos(index * PI / 64) * 0x7fffffff */ for (i = 1; i <= 16; i++) { s[i] = 2 * (mul32(c[i - 1], 105372028) + mul32(s[i - 1], 2144896908)); c[i] = 2 * (mul32(c[i - 1], 2144896908) - mul32(s[i - 1], 105372028)); } for (i = 0; i < 16; i++) { cos_table[i ] = c[i] >> 3; /* avoid output overflow */ cos_table[i + 16] = s[16 - i] >> 3; cos_table[i + 32] = -s[i] >> 3; cos_table[i + 48] = -c[16 - i] >> 3; cos_table[i + 64] = -c[i] >> 3; cos_table[i + 80] = -s[16 - i] >> 3; cos_table[i + 96] = s[i] >> 3; cos_table[i + 112] = c[16 - i] >> 3; } }
void main() { struct long32 first, second, working; char s[20]; while (TRUE) { printf ("\r\n\r\nEnter the first number: "); get_string (s,20); atol32 (s,&first); printf ("\r\nEnter the second number: "); get_string (s,20); atol32 (s,&second); printf ("\r\n\r\nA: "); print_long32 (&first); printf ("\r\nB: "); print_long32 (&second); working.hi = first.hi; working.lo = first.lo; add32 (&working, &second); printf ("\r\na + b = "); print_long32 (&working); working.hi = first.hi; working.lo = first.lo; sub32 (&working, &second); printf ("\r\na - b = "); print_long32 (&working); working.hi = first.hi; working.lo = first.lo; mul32 (&working, &second); printf ("\r\na * b = "); print_long32 (&working); working.hi = first.hi; working.lo = first.lo; div32 (&working, &second); printf ("\r\na / b = "); print_long32 (&working); rem32 (&first, &second, &working); printf ("\r\na modulus b = "); print_long32 (&working); } }
static int init_quantization_noise(DCAContext *c, int noise) { int ch, band, ret = 0; c->consumed_bits = 132 + 493 * c->fullband_channels; if (c->lfe_channel) c->consumed_bits += 72; /* attempt to guess the bit distribution based on the prevoius frame */ for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { int snr_cb = c->peak_cb[band][ch] - c->band_masking_cb[band] - noise; if (snr_cb >= 1312) { c->abits[band][ch] = 26; ret |= USED_26ABITS; } else if (snr_cb >= 222) { c->abits[band][ch] = 8 + mul32(snr_cb - 222, 69000000); ret |= USED_NABITS; } else if (snr_cb >= 0) { c->abits[band][ch] = 2 + mul32(snr_cb, 106000000); ret |= USED_NABITS; } else { c->abits[band][ch] = 1; ret |= USED_1ABITS; } } } for (band = 0; band < 32; band++) for (ch = 0; ch < c->fullband_channels; ch++) { c->consumed_bits += bit_consumption[c->abits[band][ch]]; } return ret; }
void print_long32 (struct long32 *input) { byte i; struct long32 divisor, digit, temp, value; divisor.hi = 0x3B9A; divisor.lo = 0xCA00; value.hi = input->hi; value.lo = input->lo; for(i=0;i<10;++i) { digit = value; div32 (&digit,&divisor); temp = digit; mul32 (&temp,&divisor); sub32 (&value, &temp); putc(digit.lo+'0'); temp.hi = 0; temp.lo = 0x000A; div32 (&divisor, &temp); } }
static VOID UpdateTimeStamp( USHORT new8253 ) { USHORT delta; ULONG nanos; USHORT mills; if( Last8253 >= new8253 ) { delta = Last8253-new8253; } else { // wrapped delta = 0xFFFF - new8253 + Last8253; } nanos = mul32( delta, NANOS_IN_TIC ); Last8253 = new8253; nanos += ReadDataBuf.nanosecs; if( nanos >= AMILL ) { // overflow to millsecs mills = 1; nanos -= AMILL; // the most we need to do this is 5 times anyways while( nanos >= AMILL ) { // overflow into millisecs ++mills; // try and avoid a runtime divide nanos -= AMILL; } ReadDataBuf.millisecs += mills; } ReadDataBuf.nanosecs = nanos; }
static void fft(const int32_t in[2 * 256], cplx32 out[256]) { cplx32 buf[256], rin[256], rout[256]; int i, j, k, l; /* do two transforms in parallel */ for (i = 0; i < 256; i++) { /* Apply the Hann window */ rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >> 1)); rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i + 6) >> 1)); } /* pre-rotation */ for (i = 0; i < 256; i++) { buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re) - mul32(sin_t(4 * i + 2), rin[i].im); buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im) + mul32(sin_t(4 * i + 2), rin[i].re); } for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) { for (k = 0; k < 256; k += j) { for (i = k; i < k + j / 2; i++) { cplx32 sum, diff; int t = 8 * l * i; sum.re = buf[i].re + buf[i + j / 2].re; sum.im = buf[i].im + buf[i + j / 2].im; diff.re = buf[i].re - buf[i + j / 2].re; diff.im = buf[i].im - buf[i + j / 2].im; buf[i].re = half32(sum.re); buf[i].im = half32(sum.im); buf[i + j / 2].re = mul32(diff.re, cos_t(t)) - mul32(diff.im, sin_t(t)); buf[i + j / 2].im = mul32(diff.im, cos_t(t)) + mul32(diff.re, sin_t(t)); } } } /* post-rotation */ for (i = 0; i < 256; i++) { int b = ff_reverse[i]; rout[i].re = mul32(buf[b].re, cos_t(4 * i)) - mul32(buf[b].im, sin_t(4 * i)); rout[i].im = mul32(buf[b].im, cos_t(4 * i)) + mul32(buf[b].re, sin_t(4 * i)); } for (i = 0; i < 256; i++) { /* separate the results of the two transforms */ cplx32 o1, o2; o1.re = rout[i].re - rout[255 - i].re; o1.im = rout[i].im + rout[255 - i].im; o2.re = rout[i].im - rout[255 - i].im; o2.im = -rout[i].re - rout[255 - i].re; /* combine them into one long transform */ out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1)) + mul32( o1.im - o2.im, sin_t(2 * i + 1)); out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1)) + mul32(-o1.re + o2.re, sin_t(2 * i + 1)); } }
static int init_quantization_noise(DCAEncContext *c, int noise) { int ch, band, ret = 0; uint32_t huff_bit_count_accum[MAX_CHANNELS][DCA_CODE_BOOKS][7]; uint32_t clc_bit_count_accum[MAX_CHANNELS][DCA_CODE_BOOKS]; uint32_t bits_counter = 0; c->consumed_bits = 132 + 333 * c->fullband_channels; if (c->lfe_channel) c->consumed_bits += 72; /* attempt to guess the bit distribution based on the prevoius frame */ for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { int snr_cb = c->peak_cb[ch][band] - c->band_masking_cb[band] - noise; if (snr_cb >= 1312) { c->abits[ch][band] = 26; ret |= USED_26ABITS; } else if (snr_cb >= 222) { c->abits[ch][band] = 8 + mul32(snr_cb - 222, 69000000); ret |= USED_NABITS; } else if (snr_cb >= 0) { c->abits[ch][band] = 2 + mul32(snr_cb, 106000000); ret |= USED_NABITS; } else { c->abits[ch][band] = 1; ret |= USED_1ABITS; } } c->consumed_bits += set_best_abits_code(c->abits[ch], 32, &c->bit_allocation_sel[ch]); } /* Recalc scale_factor each time to get bits consumption in case of Huffman coding. It is suboptimal solution */ /* TODO: May be cache scaled values */ for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { c->scale_factor[ch][band] = calc_one_scale(c->peak_cb[ch][band], c->abits[ch][band], &c->quant[ch][band]); } } quantize_all(c); memset(huff_bit_count_accum, 0, MAX_CHANNELS * DCA_CODE_BOOKS * 7 * sizeof(uint32_t)); memset(clc_bit_count_accum, 0, MAX_CHANNELS * DCA_CODE_BOOKS * sizeof(uint32_t)); for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { if (c->abits[ch][band] && c->abits[ch][band] <= DCA_CODE_BOOKS) { accumulate_huff_bit_consumption(c->abits[ch][band], c->quantized[ch][band], huff_bit_count_accum[ch][c->abits[ch][band] - 1]); clc_bit_count_accum[ch][c->abits[ch][band] - 1] += bit_consumption[c->abits[ch][band]]; } else { bits_counter += bit_consumption[c->abits[ch][band]]; } } } for (ch = 0; ch < c->fullband_channels; ch++) { bits_counter += set_best_code(huff_bit_count_accum[ch], clc_bit_count_accum[ch], c->quant_index_sel[ch]); } c->consumed_bits += bits_counter; return ret; }