/*************************************************************************** Function: vector_huffman Syntax: Word16 vector_huffman(Word16 category, Word16 power_index, Word16 *raw_mlt_ptr, UWord32 *word_ptr) inputs: Word16 category Word16 power_index Word16 *raw_mlt_ptr outputs: number_of_region_bits *word_ptr Description: Huffman encoding for each region based on category and power_index WMOPS: 7kHz | 24kbit | 32kbit -------|--------------|---------------- AVG | 0.03 | 0.03 -------|--------------|---------------- MAX | 0.04 | 0.04 -------|--------------|---------------- 14kHz | 24kbit | 32kbit | 48kbit -------|--------------|----------------|---------------- AVG | 0.03 | 0.03 | 0.03 -------|--------------|----------------|---------------- MAX | 0.04 | 0.04 | 0.04 -------|--------------|----------------|---------------- ***************************************************************************/ Word16 vector_huffman(Word16 category, Word16 power_index, Word16 *raw_mlt_ptr, UWord32 *word_ptr) { Word16 inv_of_step_size_times_std_dev; Word16 j,n; Word16 k; Word16 number_of_region_bits; Word16 number_of_non_zero; Word16 vec_dim; Word16 num_vecs; Word16 kmax, kmax_plus_one; Word16 index,signs_index; Word16 *bitcount_table_ptr; UWord16 *code_table_ptr; Word32 code_bits; Word16 number_of_code_bits; UWord32 current_word; Word16 current_word_bits_free; Word32 acca; Word32 accb; Word16 temp; Word16 mytemp; /* new variable in Release 1.2 */ Word16 myacca; /* new variable in Release 1.2 */ /* initialize variables */ vec_dim = vector_dimension[category]; move16(); num_vecs = number_of_vectors[category]; move16(); kmax = max_bin[category]; move16(); kmax_plus_one = add(kmax,1); move16(); current_word = 0L; move16(); current_word_bits_free = 32; move16(); number_of_region_bits = 0; move16(); /* set up table pointers */ bitcount_table_ptr = (Word16 *)table_of_bitcount_tables[category]; code_table_ptr = (UWord16 *) table_of_code_tables[category]; /* compute inverse of step size * standard deviation */ acca = L_mult(step_size_inverse_table[category],standard_deviation_inverse_table[power_index]); acca = L_shr(acca,1); acca = L_add(acca,4096); acca = L_shr(acca,13); /* * The next two lines are new to Release 1.2 */ mytemp = acca & 0x3; acca = L_shr(acca,2); inv_of_step_size_times_std_dev = extract_l(acca); for (n=0; n<num_vecs; n++) { index = 0; move16(); signs_index = 0; move16(); number_of_non_zero = 0; move16(); for (j=0; j<vec_dim; j++) { k = abs_s(*raw_mlt_ptr); acca = L_mult(k,inv_of_step_size_times_std_dev); acca = L_shr(acca,1); /* * The next four lines are new to Release 1.2 */ myacca = (Word16)L_mult(k,mytemp); myacca = (Word16)L_shr(myacca,1); myacca = (Word16)L_add(myacca,int_dead_zone_low_bits[category]); myacca = (Word16)L_shr(myacca,2); acca = L_add(acca,int_dead_zone[category]); /* * The next two lines are new to Release 1.2 */ acca = L_add(acca,myacca); acca = L_shr(acca,13); k = extract_l(acca); test(); if (k != 0) { number_of_non_zero = add(number_of_non_zero,1); signs_index = shl(signs_index,1); test(); if (*raw_mlt_ptr > 0) { signs_index = add(signs_index,1); } temp = sub(k,kmax); test(); if (temp > 0) { k = kmax; move16(); } } acca = L_shr(L_mult(index,(kmax_plus_one)),1); index = extract_l(acca); index = add(index,k); raw_mlt_ptr++; } code_bits = *(code_table_ptr+index); number_of_code_bits = add((*(bitcount_table_ptr+index)),number_of_non_zero); number_of_region_bits = add(number_of_region_bits,number_of_code_bits); acca = code_bits << number_of_non_zero; accb = L_deposit_l(signs_index); acca = L_add(acca,accb); code_bits = acca; move32(); /* msb of codebits is transmitted first. */ j = sub(current_word_bits_free,number_of_code_bits); test(); if (j >= 0) { test(); acca = code_bits << j; current_word = L_add(current_word,acca); current_word_bits_free = j; move16(); } else { j = negate(j); acca = L_shr(code_bits,j); current_word = L_add(current_word,acca); *word_ptr++ = current_word; move16(); current_word_bits_free = sub(32,j); test(); current_word = code_bits << current_word_bits_free; } } *word_ptr++ = current_word; move16(); return (number_of_region_bits); }
mat_GF2E operator-(const mat_GF2E& a) { mat_GF2E res; negate(res, a); NTL_OPT_RETURN(mat_GF2E, res); }
static long swap(long k, mat_ZZ& B, vec_long& P, vec_ZZ& D, vec_vec_ZZ& lam, mat_ZZ* U, long m, long verbose) // swaps vectors k-1 and k; assumes P(k-1) != 0 // returns 1 if vector k-1 need to be reduced after the swap... // this only occurs in 'case 2' when there are linear dependencies { long i, j; static ZZ t1, t2, t3, e, x, y; if (P(k) != 0) { swap(B(k-1), B(k)); if (U) swap((*U)(k-1), (*U)(k)); for (j = 1; j <= k-2; j++) if (P(j) != 0) swap(lam(k-1)(P(j)), lam(k)(P(j))); for (i = k+1; i <= m; i++) { MulAddDiv(t1, lam(i)(P(k)-1), lam(i)(P(k)), lam(k)(P(k)-1), D[P(k)-2], D[P(k)-1]); MulSubDiv(t2, lam(i)(P(k)-1), lam(i)(P(k)), D[P(k)], lam(k)(P(k)-1), D[P(k)-1]); lam(i)(P(k)-1) = t1; lam(i)(P(k)) = t2; } MulAddDiv(D[P(k)-1], D[P(k)], lam(k)(P(k)-1), D[P(k)-2], lam(k)(P(k)-1), D[P(k)-1]); return 0; } else if (!IsZero(lam(k)(P(k-1)))) { XGCD(e, x, y, lam(k)(P(k-1)), D[P(k-1)]); ExactDiv(t1, lam(k)(P(k-1)), e); ExactDiv(t2, D[P(k-1)], e); t3 = t2; negate(t2, t2); RowTransform(B(k-1), B(k), t1, t2, y, x); if (U) RowTransform((*U)(k-1), (*U)(k), t1, t2, y, x); for (j = 1; j <= k-2; j++) if (P(j) != 0) RowTransform(lam(k-1)(P(j)), lam(k)(P(j)), t1, t2, y, x); sqr(t2, t2); ExactDiv(D[P(k-1)], D[P(k-1)], t2); for (i = k+1; i <= m; i++) if (P(i) != 0) { ExactDiv(D[P(i)], D[P(i)], t2); for (j = i+1; j <= m; j++) { ExactDiv(lam(j)(P(i)), lam(j)(P(i)), t2); } } for (i = k+1; i <= m; i++) { ExactDiv(lam(i)(P(k-1)), lam(i)(P(k-1)), t3); } swap(P(k-1), P(k)); return 1; } else { swap(B(k-1), B(k)); if (U) swap((*U)(k-1), (*U)(k)); for (j = 1; j <= k-2; j++) if (P(j) != 0) swap(lam(k-1)(P(j)), lam(k)(P(j))); swap(P(k-1), P(k)); return 0; } }
void cos(RR& res, const RR& x) { if (x == 0) { res = 1; return; } if (Lg2(x) > 1000) Error("cos: sorry...argument too large in absolute value"); long p = RR::precision(); RR pi, t1, f; RR n; // we want to write x/pi = (n+1/2) + f, |f| < 1/2.... // but we have to do *this* very carefully, so that f is computed // to precision > p. I know, this is sick! long p1; p1 = p + Lg2(x) + 20; for (;;) { RR::SetPrecision(p1); ComputePi(pi); xcopy(t1, x/pi); xcopy(n, floor(t1)); xcopy(f, t1 - (n + 0.5)); if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) { // we don't have enough bits of f...increase p1 and continue p1 = p1 + max(20, p1/10); } else break; } RR::SetPrecision(p + NumBits(p) + 10); ComputePi(pi); xcopy(f, pi * f); if (n == 0 || n.exponent() != 0) { // n is even, so we negate f, which negates sin(f) xcopy(f, -f); } // Boy, that was painful, but now its over, and we can simply apply // the series for sin(f) RR t2, s, s1, t; long i; s = 0; xcopy(t, f); for (i = 3; ; i=i+2) { add(s1, s, t); if (s == s1) break; xcopy(s, s1); mul(t, t, f); mul(t, t, f); div(t, t, i-1); div(t, t, i); negate(t, t); } RR::SetPrecision(p); xcopy(res, s); }
void conv(RR& x, const char *s) { long c; long cval; long sign; ZZ a, b; long i = 0; if (!s) Error("bad RR input"); c = s[i]; while (IsWhiteSpace(c)) { i++; c = s[i]; } if (c == '-') { sign = -1; i++; c = s[i]; } else sign = 1; long got1 = 0; long got_dot = 0; long got2 = 0; a = 0; b = 1; cval = CharToIntVal(c); if (cval >= 0 && cval <= 9) { got1 = 1; while (cval >= 0 && cval <= 9) { mul(a, a, 10); add(a, a, cval); i++; c = s[i]; cval = CharToIntVal(c); } } if (c == '.') { got_dot = 1; i++; c = s[i]; cval = CharToIntVal(c); if (cval >= 0 && cval <= 9) { got2 = 1; while (cval >= 0 && cval <= 9) { mul(a, a, 10); add(a, a, cval); mul(b, b, 10); i++; c = s[i]; cval = CharToIntVal(c); } } } if (got_dot && !got1 && !got2) Error("bad RR input"); ZZ e; long got_e = 0; long e_sign; if (c == 'e' || c == 'E') { got_e = 1; i++; c = s[i]; if (c == '-') { e_sign = -1; i++; c = s[i]; } else if (c == '+') { e_sign = 1; i++; c = s[i]; } else e_sign = 1; cval = CharToIntVal(c); if (cval < 0 || cval > 9) Error("bad RR input"); e = 0; while (cval >= 0 && cval <= 9) { mul(e, e, 10); add(e, e, cval); i++; c = s[i]; cval = CharToIntVal(c); } } if (!got1 && !got2 && !got_e) Error("bad RR input"); RR t1, t2, v; long old_p = RR::precision(); if (got1 || got2) { ConvPrec(t1, a, max(NumBits(a), 1)); ConvPrec(t2, b, NumBits(b)); if (got_e) RR::SetPrecision(old_p + 10); div(v, t1, t2); } else set(v); if (sign < 0) negate(v, v); if (got_e) { if (e >= NTL_OVFBND) Error("RR input overflow"); long E; conv(E, e); if (e_sign < 0) E = -E; RR::SetPrecision(old_p + 10); power(t1, to_RR(10), E); mul(v, v, t1); RR::prec = old_p; } xcopy(x, v); }
_BOOL_RETURN_TYPE assertNotEqualCmp(const void *arg1, const void *arg2, cmp_fn_t cmp) { return negate(assertEqualCmp(arg1, arg2, cmp)); }
mat_ZZ_p operator-(const mat_ZZ_p& a) { mat_ZZ_p res; negate(res, a); NTL_OPT_RETURN(mat_ZZ_p, res); }
void Coder_ld8a( g729a_encoder_state *state, Word16 ana[] /* output : Analysis parameters */ ) { /* LPC analysis */ Word16 Aq_t[(MP1)*2]; /* A(z) quantized for the 2 subframes */ Word16 Ap_t[(MP1)*2]; /* A(z/gamma) for the 2 subframes */ Word16 *Aq, *Ap; /* Pointer on Aq_t and Ap_t */ /* Other vectors */ Word16 h1[L_SUBFR]; /* Impulse response h1[] */ Word16 xn[L_SUBFR]; /* Target vector for pitch search */ Word16 xn2[L_SUBFR]; /* Target vector for codebook search */ Word16 code[L_SUBFR]; /* Fixed codebook excitation */ Word16 y1[L_SUBFR]; /* Filtered adaptive excitation */ Word16 y2[L_SUBFR]; /* Filtered fixed codebook excitation */ Word16 g_coeff[4]; /* Correlations between xn & y1 */ Word16 g_coeff_cs[5]; Word16 exp_g_coeff_cs[5]; /* Correlations between xn, y1, & y2 <y1,y1>, -2<xn,y1>, <y2,y2>, -2<xn,y2>, 2<y1,y2> */ /* Scalars */ Word16 i, j, k, i_subfr; Word16 T_op, T0, T0_min, T0_max, T0_frac; Word16 gain_pit, gain_code, index; Word16 temp, taming; Word32 L_temp; /*------------------------------------------------------------------------* * - Perform LPC analysis: * * * autocorrelation + lag windowing * * * Levinson-durbin algorithm to find a[] * * * convert a[] to lsp[] * * * quantize and code the LSPs * * * find the interpolated LSPs and convert to a[] for the 2 * * subframes (both quantized and unquantized) * *------------------------------------------------------------------------*/ { /* Temporary vectors */ Word16 r_l[MP1], r_h[MP1]; /* Autocorrelations low and hi */ Word16 rc[M]; /* Reflection coefficients. */ Word16 lsp_new[M], lsp_new_q[M]; /* LSPs at 2th subframe */ /* LP analysis */ Autocorr(state->p_window, M, r_h, r_l); /* Autocorrelations */ Lag_window(M, r_h, r_l); /* Lag windowing */ Levinson(r_h, r_l, Ap_t, rc); /* Levinson Durbin */ Az_lsp(Ap_t, lsp_new, state->lsp_old); /* From A(z) to lsp */ /* LSP quantization */ Qua_lsp(state, lsp_new, lsp_new_q, ana); ana += 2; /* Advance analysis parameters pointer */ /*--------------------------------------------------------------------* * Find interpolated LPC parameters in all subframes * * The interpolated parameters are in array Aq_t[]. * *--------------------------------------------------------------------*/ Int_qlpc(state->lsp_old_q, lsp_new_q, Aq_t); /* Compute A(z/gamma) */ Weight_Az(&Aq_t[0], GAMMA1, M, &Ap_t[0]); Weight_Az(&Aq_t[MP1], GAMMA1, M, &Ap_t[MP1]); /* update the LSPs for the next frame */ Copy(lsp_new, state->lsp_old, M); Copy(lsp_new_q, state->lsp_old_q, M); } /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * * - Find the open-loop pitch delay * *----------------------------------------------------------------------*/ Residu(&Aq_t[0], &(state->speech[0]), &(state->exc[0]), L_SUBFR); Residu(&Aq_t[MP1], &(state->speech[L_SUBFR]), &(state->exc[L_SUBFR]), L_SUBFR); { Word16 Ap1[MP1]; Ap = Ap_t; Ap1[0] = 4096; for(i=1; i<=M; i++) /* Ap1[i] = Ap[i] - 0.7 * Ap[i-1]; */ Ap1[i] = sub(Ap[i], mult(Ap[i-1], 22938)); Syn_filt(Ap1, &(state->exc[0]), &(state->wsp[0]), L_SUBFR, state->mem_w, 1); Ap += MP1; for(i=1; i<=M; i++) /* Ap1[i] = Ap[i] - 0.7 * Ap[i-1]; */ Ap1[i] = sub(Ap[i], mult(Ap[i-1], 22938)); Syn_filt(Ap1, &(state->exc[L_SUBFR]), &(state->wsp[L_SUBFR]), L_SUBFR, state->mem_w, 1); } /* Find open loop pitch lag */ T_op = Pitch_ol_fast(state->wsp, PIT_MAX, L_FRAME); /* Range for closed loop pitch search in 1st subframe */ T0_min = T_op - 3; T0_max = T0_min + 6; if (T0_min < PIT_MIN) { T0_min = PIT_MIN; T0_max = PIT_MIN + 6; } else if (T0_max > PIT_MAX) { T0_max = PIT_MAX; T0_min = PIT_MAX - 6; } /*------------------------------------------------------------------------* * Loop for every subframe in the analysis frame * *------------------------------------------------------------------------* * To find the pitch and innovation parameters. The subframe size is * * L_SUBFR and the loop is repeated 2 times. * * - find the weighted LPC coefficients * * - find the LPC residual signal res[] * * - compute the target signal for pitch search * * - compute impulse response of weighted synthesis filter (h1[]) * * - find the closed-loop pitch parameters * * - encode the pitch delay * * - find target vector for codebook search * * - codebook search * * - VQ of pitch and codebook gains * * - update states of weighting filter * *------------------------------------------------------------------------*/ Aq = Aq_t; /* pointer to interpolated quantized LPC parameters */ Ap = Ap_t; /* pointer to weighted LPC coefficients */ for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { /*---------------------------------------------------------------* * Compute impulse response, h1[], of weighted synthesis filter * *---------------------------------------------------------------*/ h1[0] = 4096; Set_zero(&h1[1], L_SUBFR-1); Syn_filt(Ap, h1, h1, L_SUBFR, &h1[1], 0); /*----------------------------------------------------------------------* * Find the target vector for pitch search: * *----------------------------------------------------------------------*/ Syn_filt(Ap, &(state->exc[i_subfr]), xn, L_SUBFR, state->mem_w0, 0); /*---------------------------------------------------------------------* * Closed-loop fractional pitch search * *---------------------------------------------------------------------*/ T0 = Pitch_fr3_fast(&(state->exc[i_subfr]), xn, h1, L_SUBFR, T0_min, T0_max, i_subfr, &T0_frac); index = Enc_lag3(T0, T0_frac, &T0_min, &T0_max,PIT_MIN,PIT_MAX,i_subfr); *ana++ = index; if (i_subfr == 0) { *ana++ = Parity_Pitch(index); } /*-----------------------------------------------------------------* * - find filtered pitch exc * * - compute pitch gain and limit between 0 and 1.2 * * - update target vector for codebook search * *-----------------------------------------------------------------*/ Syn_filt(Ap, &(state->exc[i_subfr]), y1, L_SUBFR, state->mem_zero, 0); gain_pit = G_pitch(xn, y1, g_coeff, L_SUBFR); /* clip pitch gain if taming is necessary */ taming = test_err(state, T0, T0_frac); if( taming == 1){ if (gain_pit > GPCLIP) { gain_pit = GPCLIP; } } /* xn2[i] = xn[i] - y1[i] * gain_pit */ for (i = 0; i < L_SUBFR; i++) { //L_temp = L_mult(y1[i], gain_pit); //L_temp = L_shl(L_temp, 1); /* gain_pit in Q14 */ L_temp = ((Word32)y1[i] * gain_pit) << 2; xn2[i] = sub(xn[i], extract_h(L_temp)); } /*-----------------------------------------------------* * - Innovative codebook search. * *-----------------------------------------------------*/ index = ACELP_Code_A(xn2, h1, T0, state->sharp, code, y2, &i); *ana++ = index; /* Positions index */ *ana++ = i; /* Signs index */ /*-----------------------------------------------------* * - Quantization of gains. * *-----------------------------------------------------*/ g_coeff_cs[0] = g_coeff[0]; /* <y1,y1> */ exp_g_coeff_cs[0] = negate(g_coeff[1]); /* Q-Format:XXX -> JPN */ g_coeff_cs[1] = negate(g_coeff[2]); /* (xn,y1) -> -2<xn,y1> */ exp_g_coeff_cs[1] = negate(add(g_coeff[3], 1)); /* Q-Format:XXX -> JPN */ Corr_xy2( xn, y1, y2, g_coeff_cs, exp_g_coeff_cs ); /* Q0 Q0 Q12 ^Qx ^Q0 */ /* g_coeff_cs[3]:exp_g_coeff_cs[3] = <y2,y2> */ /* g_coeff_cs[4]:exp_g_coeff_cs[4] = -2<xn,y2> */ /* g_coeff_cs[5]:exp_g_coeff_cs[5] = 2<y1,y2> */ *ana++ = Qua_gain(code, g_coeff_cs, exp_g_coeff_cs, L_SUBFR, &gain_pit, &gain_code, taming); /*------------------------------------------------------------* * - Update pitch sharpening "sharp" with quantized gain_pit * *------------------------------------------------------------*/ state->sharp = gain_pit; if (state->sharp > SHARPMAX) { state->sharp = SHARPMAX; } else if (state->sharp < SHARPMIN) { state->sharp = SHARPMIN; } /*------------------------------------------------------* * - Find the total excitation * * - update filters memories for finding the target * * vector in the next subframe * *------------------------------------------------------*/ for (i = 0; i < L_SUBFR; i++) { /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */ /* exc[i] in Q0 gain_pit in Q14 */ /* code[i] in Q13 gain_cod in Q1 */ //L_temp = L_mult(exc[i+i_subfr], gain_pit); //L_temp = L_mac(L_temp, code[i], gain_code); //L_temp = L_shl(L_temp, 1); L_temp = (Word32)(state->exc[i+i_subfr]) * (Word32)gain_pit + (Word32)code[i] * (Word32)gain_code; L_temp <<= 2; state->exc[i+i_subfr] = g_round(L_temp); } update_exc_err(state, gain_pit, T0); for (i = L_SUBFR-M, j = 0; i < L_SUBFR; i++, j++) { temp = ((Word32)y1[i] * (Word32)gain_pit) >> 14; k = ((Word32)y2[i] * (Word32)gain_code) >> 13; state->mem_w0[j] = sub(xn[i], add(temp, k)); } Aq += MP1; /* interpolated LPC parameters for next subframe */ Ap += MP1; } /*--------------------------------------------------* * Update signal for next frame. * * -> shift to the left by L_FRAME: * * speech[], wsp[] and exc[] * *--------------------------------------------------*/ Copy(&(state->old_speech[L_FRAME]), &(state->old_speech[0]), L_TOTAL-L_FRAME); Copy(&(state->old_wsp[L_FRAME]), &(state->old_wsp[0]), PIT_MAX); Copy(&(state->old_exc[L_FRAME]), &(state->old_exc[0]), PIT_MAX+L_INTERPOL); }
NTL_START_IMPL void CharPoly(zz_pX& f, const mat_zz_p& M) { long n = M.NumRows(); if (M.NumCols() != n) LogicError("CharPoly: nonsquare matrix"); if (n == 0) { set(f); return; } zz_p t; if (n == 1) { SetX(f); negate(t, M(1, 1)); SetCoeff(f, 0, t); return; } mat_zz_p H; H = M; long i, j, m; zz_p u, t1; for (m = 2; m <= n-1; m++) { i = m; while (i <= n && IsZero(H(i, m-1))) i++; if (i <= n) { t = H(i, m-1); if (i > m) { swap(H(i), H(m)); // swap columns i and m for (j = 1; j <= n; j++) swap(H(j, i), H(j, m)); } for (i = m+1; i <= n; i++) { div(u, H(i, m-1), t); for (j = m; j <= n; j++) { mul(t1, u, H(m, j)); sub(H(i, j), H(i, j), t1); } for (j = 1; j <= n; j++) { mul(t1, u, H(j, i)); add(H(j, m), H(j, m), t1); } } } } vec_zz_pX F; F.SetLength(n+1); zz_pX T; T.SetMaxLength(n); set(F[0]); for (m = 1; m <= n; m++) { LeftShift(F[m], F[m-1], 1); mul(T, F[m-1], H(m, m)); sub(F[m], F[m], T); set(t); for (i = 1; i <= m-1; i++) { mul(t, t, H(m-i+1, m-i)); mul(t1, t, H(m-i, m)); mul(T, F[m-i-1], t1); sub(F[m], F[m], T); } } f = F[n]; }
vec_RR operator-(const vec_RR& a) { vec_RR res; negate(res, a); NTL_OPT_RETURN(vec_RR, res); }
constexpr Rational operator -() const { return negate(); }
void inv(GF2E& d, Mat<GF2E>& X, const Mat<GF2E>& A) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("inv: nonsquare matrix"); if (n == 0) { set(d); X.SetDims(0, 0); return; } const GF2XModulus& G = GF2E::modulus(); GF2X t1, t2; GF2X pivot; GF2X pivot_inv; Vec< GF2XVec > M; // scratch space M.SetLength(n); for (long i = 0; i < n; i++) { M[i].SetSize(n, 2*GF2E::WordLength()); for (long j = 0; j < n; j++) { M[i][j] = rep(A[i][j]); } } GF2X det; det = 1; Vec<long> P; P.SetLength(n); for (long k = 0; k < n; k++) P[k] = k; // records swap operations GF2Context GF2_context; GF2_context.save(); double sz = GF2E_SizeInWords(); bool seq = double(n)*double(n)*sz*sz < PAR_THRESH; bool pivoting = false; for (long k = 0; k < n; k++) { long pos = -1; for (long i = k; i < n; i++) { rem(pivot, M[i][k], G); if (pivot != 0) { InvMod(pivot_inv, pivot, G); pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); P[k] = pos; pivoting = true; } MulMod(det, det, pivot, G); { // multiply row k by pivot_inv GF2X *y = &M[k][0]; for (long j = 0; j < n; j++) { rem(t2, y[j], G); MulMod(y[j], t2, pivot_inv, G); } y[k] = pivot_inv; } NTL_GEXEC_RANGE(seq, n, first, last) NTL_IMPORT(n) NTL_IMPORT(k) GF2_context.restore(); GF2X *y = &M[k][0]; GF2X t1, t2; for (long i = first; i < last; i++) { if (i == k) continue; // skip row k GF2X *x = &M[i][0]; rem(t1, x[k], G); negate(t1, t1); x[k] = 0; if (t1 == 0) continue; // add t1 * row k to row i for (long j = 0; j < n; j++) { mul(t2, y[j], t1); add(x[j], x[j], t2); } } NTL_GEXEC_RANGE_END } else { clear(d); return; } }
static Word16 D2i40_11( /* (o) : Index of pulses positions. */ Word16 Dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) : Impulse response of filters. */ Word16 code[], /* (o) : Selected algebraic codeword. */ Word16 y[], /* (o) : Filtered algebraic codeword. */ Word16 *sign /* (o) : Signs of 4 pulses. */ ) { Word16 i0, i1, ip0, ip1, p0, p1; Word16 i, j, index, tmp, swap; Word16 ps0, ps1, alp, alp0; Word32 alp1; Word16 ps1c, psc, alpha; Word32 L_temp; Word16 posIndex[2], signIndex[2]; Word16 m0_bestPos, m1_bestPos; Word16 p_sign[L_SUBFR]; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *RRi1i1, *rri0i3, *RRi3i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3; Word16 *ptr_ri0i0, *ptr_ri1i1; Word16 *ptr_ri0i1, *ptr_Ri0i2, *ptr_ri0i3, *ptr_Ri3i4; Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4; Word16 *ptr_ri2i3; Word16 *outPtr_ri1i1; /* Outside loop pointer */ /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; RRi1i1 = rri0i1 + MSIZE; /* Special for 6.4 kbps */ rri0i3 = RRi1i1 + MSIZE; RRi3i4 = rri0i3 + MSIZE; /* Special for 6.4 kbps */ rri1i2 = RRi3i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if( Dn[i] >= 0) { p_sign[i] = 0x7fff; } else { p_sign[i] = (Word16)0x8000; Dn[i] = negate(Dn[i]); } } /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ ptr_ri0i1 = rri0i1; ptr_ri0i3 = rri0i3; for(i0=0; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1])); ptr_ri0i1++; *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2])); ptr_ri0i3++; } } ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for(i0=1; i0<L_SUBFR; i0+=STEP) { for(i1=2; i1<L_SUBFR; i1+=STEP) { *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i0], p_sign[i1])); ptr_ri1i2++; *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i0], p_sign[i1+1])); ptr_ri1i3++; *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i0], p_sign[i1+2])); ptr_ri1i4++; } } ptr_ri2i3 = rri2i3; ptr_Ri3i4 = RRi3i4; for(i0=2; i0<L_SUBFR; i0+=STEP) { for(i1=3; i1<L_SUBFR; i1+=STEP) { *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i0], p_sign[i1])); ptr_ri2i3++; *ptr_Ri3i4 = mult(*ptr_Ri3i4, mult(p_sign[i0+1], p_sign[i1+1])); ptr_Ri3i4++; } } ptr_Ri0i2 = RRi1i1; for(i0=1; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_Ri0i2 = mult(*ptr_Ri0i2, mult(p_sign[i0], p_sign[i1])); ptr_Ri0i2++; } } /*-------------------------------------------------------------------* * The actual search. * *-------------------------------------------------------------------*/ ip0 = 1; /* Set to any valid pulse position */ ip1 = 0; /* Set to any valid pulse position */ psc = 0; alpha = MAX_16; ptr_ri0i1 = rri0i1; outPtr_ri1i1 = rri1i1; /* Initial values for tripple loop below */ p0=0; /* Search i0,sub0 vs. i1,sub0 */ p1=1; ptr_ri0i0 = rri0i0; for (i = 0; i<9; i++) { if (i == 4) i++; /* To get right exchange sequence */ swap = i & 1; if (i == 1) p0=1; /* Search i0,sub1 vs. i1,sub0 */ else if (i == 2) { /* Search i0,sub0 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p0=0; p1=3; ptr_ri0i0 = rri0i0; } else if (i == 3) { /* Search i0,sub3 vs. i1,sub1 */ outPtr_ri1i1 = rri4i4; p0=3; p1=4; ptr_ri0i0 = rri3i3; } else if (i == 5) { /* Search i0,sub2 vs. i1,sub0 */ outPtr_ri1i1 = rri2i2; p0=1; p1=2; ptr_ri0i0 = rri1i1; } else if (i == 6) { /* Search i0,sub1 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p1=3; ptr_ri0i0 = rri1i1; } else if (i == 7) { /* Search i0,sub3 vs. i1,sub0 */ outPtr_ri1i1 = rri4i4; p1=4; ptr_ri0i0 = rri1i1; } else if (i == 8) { /* Search i0,sub2 vs. i1,sub1 */ outPtr_ri1i1 = rri3i3; p0=2; p1=3; } for (i0 = p0; i0<40; i0+=STEP) { ptr_ri1i1 = outPtr_ri1i1; ps0 = Dn[i0]; alp0 = *ptr_ri0i0++; for (i1 = p1; i1<40; i1+=STEP) { ps1 = add(ps0, Dn[i1]); alp1 = L_mult(alp0, 1); alp1 = L_mac(alp1, *ptr_ri1i1++, 1); alp1 = L_mac(alp1, *ptr_ri0i1++, 2); alp = extract_l(L_shr(alp1, 5)); ps1c = mult(ps1, ps1); L_temp = L_mult(ps1c, alpha); L_temp = L_msu(L_temp, psc, alp); if (L_temp > 0L) { psc = ps1c; alpha = alp; ip0 = i1; ip1 = i0; if ( swap ) { ip0 = i0; ip1 = i1; } } } } } /* convert from position to table entry index */ for (i0=0; i0<16; i0++) if (ip0 == trackTable0[i0]) break; ip0=i0; for (i1=0; i1<32; i1++) if (ip1 == trackTable1[i1]) break; ip1=i1; m0_bestPos = trackTable0[ip0]; m1_bestPos = trackTable1[ip1]; posIndex[0] = grayEncode[ip0]; posIndex[1] = grayEncode[ip1]; if (p_sign[m0_bestPos] > 0) signIndex[0] = 1; else signIndex[0] = 0; if (p_sign[m1_bestPos] > 0) signIndex[1] = 1; else signIndex[1] = 0; /* build innovation vector */ for (i = 0; i < L_SUBFR; i++) code[i] = 0; code[m0_bestPos] = shr(p_sign[m0_bestPos], 2); code[m1_bestPos] = add( code[m1_bestPos], shr(p_sign[m1_bestPos], 2)); *sign = add(signIndex[1], signIndex[1]); *sign = add(*sign, signIndex[0]); tmp = shl(posIndex[1], 4); index = add(posIndex[0], tmp); /* compute filtered cbInnovation */ for (i = 0; i < L_SUBFR; i++) y[i] = 0; if(signIndex[0] == 0) for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]); else for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = h[j]; if(signIndex[1] == 0) for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); else for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); return index; }
static Word16 D4i40_17( /* (o) : Index of pulses positions. */ Word16 Dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) Q12: Impulse response of filters. */ Word16 cod[], /* (o) Q13: Selected algebraic codeword. */ Word16 y[], /* (o) Q12: Filtered algebraic codeword. */ Word16 *sign, /* (o) : Signs of 4 pulses. */ Word16 i_subfr /* (i) : subframe flag */ ) { Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3; Word16 i, j, time; Word16 ps0, ps1, ps2, ps3, alp, alp0; Word32 alp1, alp2, alp3, L32; Word16 ps3c, psc, alpha; Word16 average, max0, max1, max2, thres; Word32 L_temp; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *ptr_ri0i0, *ptr_ri1i1, *ptr_ri2i2, *ptr_ri3i3, *ptr_ri4i4; Word16 *ptr_ri0i1, *ptr_ri0i2, *ptr_ri0i3, *ptr_ri0i4; Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4; Word16 *ptr_ri2i3, *ptr_ri2i4; Word16 p_sign[L_SUBFR]; /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /*-----------------------------------------------------------------------* * Reset max_time for 1st subframe. * *-----------------------------------------------------------------------*/ if (i_subfr == 0){ extra = 30; } /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if( Dn[i] >= 0) { p_sign[i] = 0x7fff; } else { p_sign[i] = (Word16)0x8000; Dn[i] = negate(Dn[i]); } } /*-------------------------------------------------------------------* * - Compute the search threshold after three pulses * *-------------------------------------------------------------------*/ /* Find maximum of Dn[i0]+Dn[i1]+Dn[i2] */ max0 = Dn[0]; max1 = Dn[1]; max2 = Dn[2]; for (i = 5; i < L_SUBFR; i+=STEP) { if (sub(Dn[i] , max0) > 0){ max0 = Dn[i]; } if (sub(Dn[i+1], max1) > 0){ max1 = Dn[i+1]; } if (sub(Dn[i+2], max2) > 0){ max2 = Dn[i+2]; } } max0 = add(max0, max1); max0 = add(max0, max2); /* Find average of Dn[i0]+Dn[i1]+Dn[i2] */ L32 = 0; for (i = 0; i < L_SUBFR; i+=STEP) { L32 = L_mac(L32, Dn[i], 1); L32 = L_mac(L32, Dn[i+1], 1); L32 = L_mac(L32, Dn[i+2], 1); } average =extract_l( L_shr(L32, 4)); /* 1/8 of sum */ /* thres = average + (max0-average)*THRESHFCB; */ thres = sub(max0, average); thres = mult(thres, THRESHFCB); thres = add(thres, average); /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ ptr_ri0i1 = rri0i1; ptr_ri0i2 = rri0i2; ptr_ri0i3 = rri0i3; ptr_ri0i4 = rri0i4; for(i0=0; i0<L_SUBFR; i0+=STEP) { for(i1=1; i1<L_SUBFR; i1+=STEP) { *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1])); ptr_ri0i1++; *ptr_ri0i2 = mult(*ptr_ri0i2, mult(p_sign[i0], p_sign[i1+1])); ptr_ri0i2++; *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2])); ptr_ri0i3++; *ptr_ri0i4 = mult(*ptr_ri0i4, mult(p_sign[i0], p_sign[i1+3])); ptr_ri0i4++; } } ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for(i1=1; i1<L_SUBFR; i1+=STEP) { for(i2=2; i2<L_SUBFR; i2+=STEP) { *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i1], p_sign[i2])); ptr_ri1i2++; *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i1], p_sign[i2+1])); ptr_ri1i3++; *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i1], p_sign[i2+2])); ptr_ri1i4++; } } ptr_ri2i3 = rri2i3; ptr_ri2i4 = rri2i4; for(i2=2; i2<L_SUBFR; i2+=STEP) { for(i3=3; i3<L_SUBFR; i3+=STEP) { *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i2], p_sign[i3])); ptr_ri2i3++; *ptr_ri2i4 = mult(*ptr_ri2i4, mult(p_sign[i2], p_sign[i3+1])); ptr_ri2i4++; } } /*-------------------------------------------------------------------* * Search the optimum positions of the four pulses which maximize * * square(correlation) / energy * * The search is performed in four nested loops. At each loop, one * * pulse contribution is added to the correlation and energy. * * * * The fourth loop is entered only if the correlation due to the * * contribution of the first three pulses exceeds the preset * * threshold. * *-------------------------------------------------------------------*/ /* Default values */ ip0 = 0; ip1 = 1; ip2 = 2; ip3 = 3; psc = 0; alpha = MAX_16; time = add(MAX_TIME, extra); /* Four loops to search innovation code. */ ptr_ri0i0 = rri0i0; /* Init. pointers that depend on first loop */ ptr_ri0i1 = rri0i1; ptr_ri0i2 = rri0i2; ptr_ri0i3 = rri0i3; ptr_ri0i4 = rri0i4; for (i0 = 0; i0 < L_SUBFR; i0 += STEP) /* first pulse loop */ { ps0 = Dn[i0]; alp0 = *ptr_ri0i0++; ptr_ri1i1 = rri1i1; /* Init. pointers that depend on second loop */ ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ptr_ri1i4 = rri1i4; for (i1 = 1; i1 < L_SUBFR; i1 += STEP) /* second pulse loop */ { ps1 = add(ps0, Dn[i1]); /* alp1 = alp0 + *ptr_ri1i1++ + 2.0 * ( *ptr_ri0i1++); */ alp1 = L_mult(alp0, 1); alp1 = L_mac(alp1, *ptr_ri1i1++, 1); alp1 = L_mac(alp1, *ptr_ri0i1++, 2); ptr_ri2i2 = rri2i2; /* Init. pointers that depend on third loop */ ptr_ri2i3 = rri2i3; ptr_ri2i4 = rri2i4; for (i2 = 2; i2 < L_SUBFR; i2 += STEP) /* third pulse loop */ { ps2 = add(ps1, Dn[i2]); /* alp2 = alp1 + *ptr_ri2i2++ + 2.0 * (*ptr_ri0i2++ + *ptr_ri1i2++); */ alp2 = L_mac(alp1, *ptr_ri2i2++, 1); alp2 = L_mac(alp2, *ptr_ri0i2++, 2); alp2 = L_mac(alp2, *ptr_ri1i2++, 2); /* Test threshold */ if ( sub(ps2, thres) > 0) { ptr_ri3i3 = rri3i3; /* Init. pointers that depend on 4th loop */ for (i3 = 3; i3 < L_SUBFR; i3 += STEP) /* 4th pulse loop */ { ps3 = add(ps2, Dn[i3]); /* alp3 = alp2 + *ptr_ri3i3++ */ /* + 2.0*( *ptr_ri0i3++ + *ptr_ri1i3++ + *ptr_ri2i3++); */ alp3 = L_mac(alp2, *ptr_ri3i3++, 1); alp3 = L_mac(alp3, *ptr_ri0i3++, 2); alp3 = L_mac(alp3, *ptr_ri1i3++, 2); alp3 = L_mac(alp3, *ptr_ri2i3++, 2); alp = extract_l(L_shr(alp3, 5)); ps3c = mult(ps3, ps3); L_temp = L_mult(ps3c, alpha); L_temp = L_msu(L_temp, psc, alp); if( L_temp > 0L ) { psc = ps3c; alpha = alp; ip0 = i0; ip1 = i1; ip2 = i2; ip3 = i3; } } /* end of for i3 = */ ptr_ri0i3 -= NB_POS; ptr_ri1i3 -= NB_POS; ptr_ri4i4 = rri4i4; /* Init. pointers that depend on 4th loop */ for (i3 = 4; i3 < L_SUBFR; i3 += STEP) /* 4th pulse loop */ { ps3 = add(ps2, Dn[i3]); /* alp3 = alp2 + *ptr_ri4i4++ */ /* + 2.0*( *ptr_ri0i4++ + *ptr_ri1i4++ + *ptr_ri2i4++); */ alp3 = L_mac(alp2, *ptr_ri4i4++, 1); alp3 = L_mac(alp3, *ptr_ri0i4++, 2); alp3 = L_mac(alp3, *ptr_ri1i4++, 2); alp3 = L_mac(alp3, *ptr_ri2i4++, 2); alp = extract_l(L_shr(alp3, 5)); ps3c = mult(ps3, ps3); L_temp = L_mult(ps3c, alpha); L_temp = L_msu(L_temp, psc, alp); if( L_temp > 0L ) { psc = ps3c; alpha = alp; ip0 = i0; ip1 = i1; ip2 = i2; ip3 = i3; } } /* end of for i3 = */ ptr_ri0i4 -= NB_POS; ptr_ri1i4 -= NB_POS; time = sub(time, 1); if(time <= 0 ) goto end_search; /* Maximum time finish */ } /* end of if >thres */ else { ptr_ri2i3 += NB_POS; ptr_ri2i4 += NB_POS; } } /* end of for i2 = */ ptr_ri0i2 -= NB_POS; ptr_ri1i3 += NB_POS; ptr_ri1i4 += NB_POS; } /* end of for i1 = */ ptr_ri0i2 += NB_POS; ptr_ri0i3 += NB_POS; ptr_ri0i4 += NB_POS; } /* end of for i0 = */ end_search: extra = time; /* Set the sign of impulses */ i0 = p_sign[ip0]; i1 = p_sign[ip1]; i2 = p_sign[ip2]; i3 = p_sign[ip3]; /* Find the codeword corresponding to the selected positions */ for(i=0; i<L_SUBFR; i++) {cod[i] = 0; } cod[ip0] = shr(i0, 2); /* From Q15 to Q13 */ cod[ip1] = shr(i1, 2); cod[ip2] = shr(i2, 2); cod[ip3] = shr(i3, 2); /* find the filtered codeword */ for (i = 0; i < L_SUBFR; i++) {y[i] = 0; } if(i0 > 0) for(i=ip0, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip0, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i1 > 0) for(i=ip1, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip1, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i2 > 0) for(i=ip2, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip2, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } if(i3 > 0) for(i=ip3, j=0; i<L_SUBFR; i++, j++) { y[i] = add(y[i], h[j]); } else for(i=ip3, j=0; i<L_SUBFR; i++, j++) { y[i] = sub(y[i], h[j]); } /* find codebook index; 17-bit address */ i = 0; if(i0 > 0) i = add(i, 1); if(i1 > 0) i = add(i, 2); if(i2 > 0) i = add(i, 4); if(i3 > 0) i = add(i, 8); *sign = i; ip0 = mult(ip0, 6554); /* ip0/5 */ ip1 = mult(ip1, 6554); /* ip1/5 */ ip2 = mult(ip2, 6554); /* ip2/5 */ i = mult(ip3, 6554); /* ip3/5 */ j = add(i, shl(i, 2)); /* j = i*5 */ j = sub(ip3, add(j, 3)); /* j= ip3%5 -3 */ ip3 = add(shl(i, 1), j); i = add(ip0, shl(ip1, 3)); i = add(i , shl(ip2, 6)); i = add(i , shl(ip3, 9)); return i; }
_BOOL_RETURN_TYPE assertGreaterEqual(const int arg1, const int arg2) { _BOOL_RETURN_TYPE res = negate(assertLessCmp(&arg1, &arg2, (cmp_fn_t)int_cmp)); if(res == _FALSE) { print_error(arg1, arg2, "not greater or equal to"); } return res; }
/* Interpolate between two vertices and put the result into a0.0. * Increment a0.0 accordingly. */ void brw_clip_interp_vertex( struct brw_clip_compile *c, struct brw_indirect dest_ptr, struct brw_indirect v0_ptr, /* from */ struct brw_indirect v1_ptr, /* to */ struct brw_reg t0, bool force_edgeflag) { struct brw_compile *p = &c->func; struct brw_reg tmp = get_tmp(c); GLuint slot; /* Just copy the vertex header: */ /* * After CLIP stage, only first 256 bits of the VUE are read * back on Ironlake, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) */ for (slot = 0; slot < c->vue_map.num_slots; slot++) { int varying = c->vue_map.slot_to_varying[slot]; GLuint delta = brw_vue_slot_to_offset(slot); if (varying == VARYING_SLOT_EDGE) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } else if (varying == VARYING_SLOT_PSIZ || varying == VARYING_SLOT_CLIP_DIST0 || varying == VARYING_SLOT_CLIP_DIST1) { /* PSIZ doesn't need interpolation because it isn't used by the * fragment shader. CLIP_DIST0 and CLIP_DIST1 don't need * intepolation because on pre-GEN6, these are just placeholder VUE * slots that don't perform any action. */ } else if (varying < VARYING_SLOT_MAX) { /* This is a true vertex result (and not a special value for the VUE * header), so interpolate: * * New = attr0 + t*attr1 - t*attr0 */ brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); } } if (c->vue_map.num_slots % 2) { GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } release_tmp(c, tmp); /* Recreate the projected (NDC) coordinate in the new vertex * header: */ brw_clip_project_vertex(c, dest_ptr ); }
_BOOL_RETURN_TYPE assertNotEqual(const int arg1, const int arg2) { _BOOL_RETURN_TYPE res = negate(assertEqualCmp(&arg1, &arg2, (cmp_fn_t)int_cmp)); if(res == _FALSE) { print_error(arg1, arg2, "equals"); } return res; }
Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coefs,Word16 dct_length) { Word16 index, vals_left,mag_shift,n; Word16 windowed_data[MAX_DCT_LENGTH]; Word16 *new_ptr, *old_ptr, *sam_low, *sam_high; Word16 *win_low, *win_high; Word16 *dst_ptr; Word16 neg_win_low; Word16 samp_high; Word16 half_dct_size; Word32 acca; Word32 accb; Word16 temp; Word16 temp1; Word16 temp2; Word16 temp5; half_dct_size = shr(dct_length,1); /*++++++++++++++++++++++++++++++++++++++++++++*/ /* Get the first half of the windowed samples */ /*++++++++++++++++++++++++++++++++++++++++++++*/ dst_ptr = windowed_data; move16(); /* address arithmetic */ test(); if (dct_length==DCT_LENGTH) { win_high = samples_to_rmlt_window + half_dct_size; } else { win_high = max_samples_to_rmlt_window + half_dct_size; } win_low = win_high; move16(); /* address arithmetic */ sam_high = old_samples + half_dct_size; sam_low = sam_high; move16(); for (vals_left = half_dct_size;vals_left > 0;vals_left--) { acca = 0L; move32(); acca = L_mac(acca,*--win_low, *--sam_low); acca = L_mac(acca,*win_high++, *sam_high++); temp = round16(acca); *dst_ptr++ = temp; move16(); } /*+++++++++++++++++++++++++++++++++++++++++++++*/ /* Get the second half of the windowed samples */ /*+++++++++++++++++++++++++++++++++++++++++++++*/ sam_low = new_samples; move16(); /* address arithmetic */ sam_high = new_samples + dct_length; for (vals_left = half_dct_size; vals_left > 0; vals_left--) { acca = 0L; move32(); acca = L_mac(acca,*--win_high, *sam_low++); neg_win_low = negate(*win_low++); samp_high = *--sam_high; acca = L_mac(acca, neg_win_low, samp_high); temp = round16(acca); *dst_ptr++=temp; move16(); } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* Save the new samples for next time, when they will be the old samples */ /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ new_ptr = new_samples; move16(); old_ptr = old_samples; move16(); for (vals_left = dct_length;vals_left > 0;vals_left--) { *old_ptr++ = *new_ptr++; move16(); } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* Calculate how many bits to shift up the input to the DCT. */ /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ temp1=0; move16(); for(index=0;index<dct_length;index++) { temp2 = abs_s(windowed_data[index]); temp = sub(temp2,temp1); test(); if(temp > 0) { move16(); temp1 = temp2; } } mag_shift=0; move16(); temp = sub(temp1,14000); test(); if (temp >= 0) { mag_shift = 0; move16(); } else { temp = sub(temp1,438); test(); if(temp < 0) temp = add(temp1,1); else { temp = temp1; move16(); } accb = L_mult(temp,9587); acca = L_shr(accb,20); temp5 = extract_l(acca); temp = norm_s(temp5); test(); if (temp == 0) { mag_shift = 9; move16(); } else mag_shift = sub(temp,6); } acca = 0L; move32(); for(index=0; index<dct_length; index++) { temp = abs_s( windowed_data[index]); acca = L_add(acca,temp); } acca = L_shr(acca,7); test(); if (temp1 < acca) { mag_shift = sub(mag_shift,1); } test(); if (mag_shift > 0) { for(index=0;index<dct_length;index++) { windowed_data[index] = shl(windowed_data[index],mag_shift); } } else { test(); if (mag_shift < 0) { n = negate(mag_shift); for(index=0;index<dct_length;index++) { windowed_data[index] = shr(windowed_data[index],n); move16(); } } } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* Perform a Type IV DCT on the windowed data to get the coefficients */ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ dct_type_iv_a(windowed_data, coefs, dct_length); return(mag_shift); }
void kernel(mat_ZZ_p& X, const mat_ZZ_p& A) { long m = A.NumRows(); long n = A.NumCols(); mat_ZZ_p M; long r; transpose(M, A); r = gauss(M); X.SetDims(m-r, m); long i, j, k, s; ZZ t1, t2; ZZ_p T3; vec_long D; D.SetLength(m); for (j = 0; j < m; j++) D[j] = -1; vec_ZZ_p inverses; inverses.SetLength(m); j = -1; for (i = 0; i < r; i++) { do { j++; } while (IsZero(M[i][j])); D[j] = i; inv(inverses[j], M[i][j]); } for (k = 0; k < m-r; k++) { vec_ZZ_p& v = X[k]; long pos = 0; for (j = m-1; j >= 0; j--) { if (D[j] == -1) { if (pos == k) set(v[j]); else clear(v[j]); pos++; } else { i = D[j]; clear(t1); for (s = j+1; s < m; s++) { mul(t2, rep(v[s]), rep(M[i][s])); add(t1, t1, t2); } conv(T3, t1); mul(T3, T3, inverses[j]); negate(v[j], T3); } } } }
int main(void) { int a = 10, b = 0, c = -10; printf("The outcome values are following: %d %d %d %d %d %d \n", increment(a), increment(b), increment(c), negate(a), negate(b), negate(c)); }
void sin(RR& res, const RR& x) { if (x == 0) { res = 0; return; } if (Lg2(x) > 1000) Error("sin: sorry...argument too large in absolute value"); long p = RR::precision(); RR pi, t1, f; RR n; // we want to make x^2 < 3, so that the series for sin(x) // converges nicely, without any nasty cancellations in the // first terms of the series. RR::SetPrecision(p + NumBits(p) + 10); if (x*x < 3) { xcopy(f, x); } else { // we want to write x/pi = n + f, |f| < 1/2.... // but we have to do *this* very carefully, so that f is computed // to precision > p. I know, this is sick! long p1; p1 = p + Lg2(x) + 20; for (;;) { RR::SetPrecision(p1); ComputePi(pi); xcopy(t1, x/pi); xcopy(n, floor(t1)); xcopy(f, t1 - n); if (f > 0.5) { n++; xcopy(f, t1 - n); } if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) { // we don't have enough bits of f...increase p1 and continue p1 = p1 + max(20, p1/10); } else break; } RR::SetPrecision(p + NumBits(p) + 10); ComputePi(pi); xcopy(f, pi * f); if (n != 0 && n.exponent() == 0) { // n is odd, so we negate f, which negates sin(f) xcopy(f, -f); } } // Boy, that was painful, but now its over, and we can simply apply // the series for sin(f) RR t2, s, s1, t; long i; s = 0; xcopy(t, f); for (i = 3; ; i=i+2) { add(s1, s, t); if (s == s1) break; xcopy(s, s1); mul(t, t, f); mul(t, t, f); div(t, t, i-1); div(t, t, i); negate(t, t); } RR::SetPrecision(p); xcopy(res, s); }
/*---------------------------------------------------------------------------- ; FUNCTION CODE ----------------------------------------------------------------------------*/ Word16 hp_max( Word32 corr[], /* i : correlation vector. */ Word16 scal_sig[], /* i : scaled signal. */ Word16 L_frame, /* i : length of frame to compute pitch */ Word16 lag_max, /* i : maximum lag */ Word16 lag_min, /* i : minimum lag */ Word16 *cor_hp_max, /* o : max high-pass filtered norm. correlation */ Flag *pOverflow /* i/o : overflow Flag */ ) { Word16 i; Word16 *p, *p1; Word32 max, t0, t1; Word16 max16, t016, cor_max; Word16 shift, shift1, shift2; Word32 L_temp; max = MIN_32; t0 = 0L; for (i = lag_max - 1; i > lag_min; i--) { /* high-pass filtering */ t0 = L_shl(corr[-i], 1, pOverflow); L_temp = L_sub(t0, corr[-i-1], pOverflow); t0 = L_sub(L_temp, corr[-i+1], pOverflow); t0 = L_abs(t0); if (t0 >= max) { max = t0; } } /* compute energy */ p = scal_sig; p1 = &scal_sig[0]; t0 = 0L; for (i = 0; i < L_frame; i++, p++, p1++) { t0 = L_mac(t0, *p, *p1, pOverflow); } p = scal_sig; p1 = &scal_sig[-1]; t1 = 0L; for (i = 0; i < L_frame; i++, p++, p1++) { t1 = L_mac(t1, *p, *p1, pOverflow); } /* high-pass filtering */ L_temp = L_shl(t0, 1, pOverflow); t1 = L_shl(t1, 1, pOverflow); t0 = L_sub(L_temp, t1, pOverflow); t0 = L_abs(t0); /* max/t0 */ /* shift1 = sub(norm_l(max), 1); max16 = extract_h(L_shl(max, shift1)); shift2 = norm_l(t0); t016 = extract_h(L_shl(t0, shift2)); */ t016 = norm_l(max); shift1 = t016 - 1; L_temp = L_shl(max, shift1, pOverflow); max16 = (Word16)(L_temp >> 16); shift2 = norm_l(t0); L_temp = L_shl(t0, shift2, pOverflow); t016 = (Word16)(L_temp >> 16); if (t016 != 0) { cor_max = div_s(max16, t016); } else { cor_max = 0; } shift = shift1 - shift2; if (shift >= 0) { *cor_hp_max = shr(cor_max, shift, pOverflow); /* Q15 */ } else { *cor_hp_max = shl(cor_max, negate(shift), pOverflow); /* Q15 */ } return 0; }
void negate(RR& z, const RR& a) { xcopy(z, a); negate(z.x, z.x); }
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 3; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->key.do_twoside_color) do_twoside_color(c); if (c->has_flat_shading) do_flatshade_triangle(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); struct brw_reg a2 = offset(c->vert[2], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); brw_MUL(p, a2, a2, c->inv_w[2]); } /* Calculate coefficients for interpolated values: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); /* calculate dA/dx */ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); /* calculate dA/dy */ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in * the send instruction: */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* offset */ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ } } brw_set_predicate_control(p, BRW_PREDICATE_NONE); }
static Word16 D4i40_17_fast(/*(o) : Index of pulses positions. */ Word16 dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) Q12: Impulse response of filters. */ Word16 cod[], /* (o) Q13: Selected algebraic codeword. */ Word16 y[], /* (o) Q12: Filtered algebraic codeword. */ Word16 *sign /* (o) : Signs of 4 pulses. */ ) { Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3; Word16 i, j, ix, iy, track, trk, max; Word16 prev_i0, i1_offset; Word16 psk, ps, ps0, ps1, ps2, sq, sq2; Word16 alpk, alp, alp_16; Word32 s, alp0, alp1, alp2; Word16 *p0, *p1, *p2, *p3, *p4; Word16 sign_dn[L_SUBFR], sign_dn_inv[L_SUBFR], *psign; Word16 tmp_vect[NB_POS]; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *ptr_rri0i3_i4; Word16 *ptr_rri1i3_i4; Word16 *ptr_rri2i3_i4; Word16 *ptr_rri3i3_i4; /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if (dn[i] >= 0) { sign_dn[i] = MAX_16; sign_dn_inv[i] = MIN_16; } else { sign_dn[i] = MIN_16; sign_dn_inv[i] = MAX_16; dn[i] = negate(dn[i]); } } /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ p0 = rri0i1; p1 = rri0i2; p2 = rri0i3; p3 = rri0i4; for(i0=0; i0<L_SUBFR; i0+=STEP) { psign = sign_dn; if (psign[i0] < 0) psign = sign_dn_inv; for(i1=1; i1<L_SUBFR; i1+=STEP) { *p0++ = mult(*p0, psign[i1]); *p1++ = mult(*p1, psign[i1+1]); *p2++ = mult(*p2, psign[i1+2]); *p3++ = mult(*p3, psign[i1+3]); } } p0 = rri1i2; p1 = rri1i3; p2 = rri1i4; for(i1=1; i1<L_SUBFR; i1+=STEP) { psign = sign_dn; if (psign[i1] < 0) psign = sign_dn_inv; for(i2=2; i2<L_SUBFR; i2+=STEP) { *p0++ = mult(*p0, psign[i2]); *p1++ = mult(*p1, psign[i2+1]); *p2++ = mult(*p2, psign[i2+2]); } } p0 = rri2i3; p1 = rri2i4; for(i2=2; i2<L_SUBFR; i2+=STEP) { psign = sign_dn; if (psign[i2] < 0) psign = sign_dn_inv; for(i3=3; i3<L_SUBFR; i3+=STEP) { *p0++ = mult(*p0, psign[i3]); *p1++ = mult(*p1, psign[i3+1]); } } /*-------------------------------------------------------------------* * Search the optimum positions of the four pulses which maximize * * square(correlation) / energy * *-------------------------------------------------------------------*/ psk = -1; alpk = 1; ptr_rri0i3_i4 = rri0i3; ptr_rri1i3_i4 = rri1i3; ptr_rri2i3_i4 = rri2i3; ptr_rri3i3_i4 = rri3i3; /* Initializations only to remove warning from some compilers */ ip0=0; ip1=1; ip2=2; ip3=3; ix=0; iy=0; ps=0; /* search 2 times: track 3 and 4 */ for (track=3, trk=0; track<5; track++, trk++) { /*------------------------------------------------------------------* * depth first search 3, phase A: track 2 and 3/4. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 2 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 2 */ for (j=2; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = rri2i2 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 2 */ p0 = ptr_rri2i3_i4 + shl(j, 3); p1 = ptr_rri3i3_i4; for (i1=track; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0++, _1_2); alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 3, phase B: track 0 and 1. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = rri1i2 + mult(i0, 6554); p1 = ptr_rri1i3_i4 + mult(i1, 6554); p2 = rri1i1; p3 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1, _1_4); p1 += NB_POS; s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 0 */ p0 = rri0i2 + mult(i0, 6554); p1 = ptr_rri0i3_i4 + mult(i1, 6554); p2 = rri0i0; p3 = rri0i1; for (i2=0; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1, _1_8); p1 += NB_POS; alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 1 */ p4 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 3: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip2 = i0; ip3 = i1; ip0 = ix; ip1 = iy; } /*------------------------------------------------------------------* * depth first search 4, phase A: track 3 and 0. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 3/4 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 3/4 */ for (j=track; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = ptr_rri3i3_i4 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 0 */ p0 = ptr_rri0i3_i4 + j; p1 = rri0i0; for (i1=0; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0, _1_2); p0 += NB_POS; alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 4, phase B: track 1 and 2. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = ptr_rri2i3_i4 + mult(i0, 6554); p1 = rri0i2 + i1_offset; p2 = rri2i2; p3 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1++, _1_4); s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 1 */ p0 = ptr_rri1i3_i4 + mult(i0, 6554); p1 = rri0i1 + i1_offset; p2 = rri1i1; p3 = rri1i2; for (i2=1; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1++, _1_8); alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 2 */ p4 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 1: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip3 = i0; ip0 = i1; ip1 = ix; ip2 = iy; } ptr_rri0i3_i4 = rri0i4; ptr_rri1i3_i4 = rri1i4; ptr_rri2i3_i4 = rri2i4; ptr_rri3i3_i4 = rri4i4; } /* Set the sign of impulses */ i0 = sign_dn[ip0]; i1 = sign_dn[ip1]; i2 = sign_dn[ip2]; i3 = sign_dn[ip3]; /* Find the codeword corresponding to the selected positions */ for(i=0; i<L_SUBFR; i++) { cod[i] = 0; } cod[ip0] = shr(i0, 2); /* From Q15 to Q13 */ cod[ip1] = shr(i1, 2); cod[ip2] = shr(i2, 2); cod[ip3] = shr(i3, 2); /* find the filtered codeword */ for (i = 0; i < ip0; i++) y[i] = 0; if(i0 > 0) for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = h[j]; else for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]); if(i1 > 0) for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i2 > 0) for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i3 > 0) for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); /* find codebook index; 17-bit address */ i = 0; if(i0 > 0) i = add(i, 1); if(i1 > 0) i = add(i, 2); if(i2 > 0) i = add(i, 4); if(i3 > 0) i = add(i, 8); *sign = i; ip0 = mult(ip0, 6554); /* ip0/5 */ ip1 = mult(ip1, 6554); /* ip1/5 */ ip2 = mult(ip2, 6554); /* ip2/5 */ i = mult(ip3, 6554); /* ip3/5 */ j = add(i, shl(i, 2)); /* j = i*5 */ j = sub(ip3, add(j, 3)); /* j= ip3%5 -3 */ ip3 = add(shl(i, 1), j); i = add(ip0, shl(ip1, 3)); i = add(i , shl(ip2, 6)); i = add(i , shl(ip3, 9)); return i; }
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 2; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->has_flat_shading) do_flatshade_line(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); } /* Calculate coefficients for position, color: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } } brw_set_predicate_control(p, BRW_PREDICATE_NONE); }
void cosine_of_angle(void) { int n; double d; if (car(p1) == symbol(ARCCOS)) { push(cadr(p1)); return; } if (isdouble(p1)) { d = cos(p1->u.d); if (fabs(d) < 1e-10) d = 0.0; push_double(d); return; } // cosine function is symmetric, cos(-x) = cos(x) if (isnegative(p1)) { push(p1); negate(); p1 = pop(); } // cos(arctan(x)) = 1 / sqrt(1 + x^2) // see p. 173 of the CRC Handbook of Mathematical Sciences if (car(p1) == symbol(ARCTAN)) { push_integer(1); push(cadr(p1)); push_integer(2); power(); add(); push_rational(-1, 2); power(); return; } // multiply by 180/pi push(p1); push_integer(180); multiply(); push_symbol(PI); divide(); n = pop_integer(); if (n < 0) { push(symbol(COS)); push(p1); list(2); return; } switch (n % 360) { case 90: case 270: push_integer(0); break; case 60: case 300: push_rational(1, 2); break; case 120: case 240: push_rational(-1, 2); break; case 45: case 315: push_rational(1, 2); push_integer(2); push_rational(1, 2); power(); multiply(); break; case 135: case 225: push_rational(-1, 2); push_integer(2); push_rational(1, 2); power(); multiply(); break; case 30: case 330: push_rational(1, 2); push_integer(3); push_rational(1, 2); power(); multiply(); break; case 150: case 210: push_rational(-1, 2); push_integer(3); push_rational(1, 2); power(); multiply(); break; case 0: push_integer(1); break; case 180: push_integer(-1); break; default: push(symbol(COS)); push(p1); list(2); break; } }
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear, pc_coord_replace; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); pc_coord_replace = calculate_point_sprite_mask(c, i); pc_persp &= ~pc_coord_replace; if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* Point sprite coordinate replacement: A texcoord with this * enabled gets replaced with the value (x, y, 0, 1) where x and * y vary from 0 to 1 across the horizontal and vertical of the * point. */ if (pc_coord_replace) { set_predicate_control_flag_value(p, c, pc_coord_replace); /* Caculate 1.0/PointWidth */ brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_access_mode(p, BRW_ALIGN_16); /* dA/dx, dA/dy */ brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } /* attribute constant offset */ brw_MOV(p, c->m3C0, brw_imm_f(0.0)); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); } else { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); } brw_set_access_mode(p, BRW_ALIGN_1); } if (pc & ~pc_coord_replace) { set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); brw_MOV(p, c->m3C0, a0); /* constant value */ } set_predicate_control_flag_value(p, c, pc); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } brw_set_predicate_control(p, BRW_PREDICATE_NONE); }
long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce) { long n = A.NumRows(); long m = A.NumCols(); if (y.length() != m) Error("LatticeSolve: dimension mismatch"); if (reduce < 0 || reduce > 2) Error("LatticeSolve: bad reduce parameter"); if (IsZero(y)) { x.SetLength(n); clear(x); return 1; } mat_ZZ A1, U1; ZZ det2; long im_rank, ker_rank; A1 = A; im_rank = image(det2, A1, U1); ker_rank = n - im_rank; mat_ZZ A2, U2; long new_rank; long i; A2.SetDims(im_rank + 1, m); for (i = 1; i <= im_rank; i++) A2(i) = A1(ker_rank + i); A2(im_rank + 1) = y; new_rank = image(det2, A2, U2); if (new_rank != im_rank || (U2(1)(im_rank+1) != 1 && U2(1)(im_rank+1) != -1)) return 0; vec_ZZ x1; x1.SetLength(im_rank); for (i = 1; i <= im_rank; i++) x1(i) = U2(1)(i); if (U2(1)(im_rank+1) == 1) negate(x1, x1); vec_ZZ x2, tmp; x2.SetLength(n); clear(x2); tmp.SetLength(n); for (i = 1; i <= im_rank; i++) { mul(tmp, U1(ker_rank+i), x1(i)); add(x2, x2, tmp); } if (reduce == 0) { x = x2; return 1; } else if (reduce == 1) { U1.SetDims(ker_rank+1, n); U1(ker_rank+1) = x2; image(det2, U1); x = U1(ker_rank + 1); return 1; } else if (reduce == 2) { U1.SetDims(ker_rank, n); LLL(det2, U1); U1.SetDims(ker_rank+1, n); U1(ker_rank+1) = x2; image(det2, U1); x = U1(ker_rank + 1); return 1; } return 0; }
/*************************************************************************** Function: bits_to_words Syntax: bits_to_words(UWord32 *region_mlt_bits, Word16 *region_mlt_bit_counts, Word16 *drp_num_bits, UWord16 *drp_code_bits, Word16 *out_words, Word16 categorization_control, Word16 number_of_regions, Word16 num_categorization_control_bits, Word16 number_of_bits_per_frame) Description: Stuffs the bits into words for output WMOPS: 7kHz | 24kbit | 32kbit -------|--------------|---------------- AVG | 0.09 | 0.12 -------|--------------|---------------- MAX | 0.10 | 0.13 -------|--------------|---------------- 14kHz | 24kbit | 32kbit | 48kbit -------|--------------|----------------|---------------- AVG | 0.12 | 0.15 | 0.19 -------|--------------|----------------|---------------- MAX | 0.14 | 0.17 | 0.21 -------|--------------|----------------|---------------- ***************************************************************************/ void bits_to_words(UWord32 *region_mlt_bits, Word16 *region_mlt_bit_counts, Word16 *drp_num_bits, UWord16 *drp_code_bits, Word16 *out_words, Word16 categorization_control, Word16 number_of_regions, Word16 num_categorization_control_bits, Word16 number_of_bits_per_frame) { Word16 out_word_index = 0; Word16 j; Word16 region; Word16 out_word; Word16 region_bit_count; Word16 current_word_bits_left; UWord16 slice; Word16 out_word_bits_free = 16; UWord32 *in_word_ptr; UWord32 current_word; Word32 acca; Word32 accb; Word16 temp; /* First set up the categorization control bits to look like one more set of region power bits. */ out_word = 0; move16(); drp_num_bits[number_of_regions] = num_categorization_control_bits; move16(); drp_code_bits[number_of_regions] = (UWord16)categorization_control; move16(); /* These code bits are right justified. */ for (region=0; region <= number_of_regions; region++) { current_word_bits_left = drp_num_bits[region]; move16(); current_word = (UWord32)drp_code_bits[region]; move16(); j = sub(current_word_bits_left,out_word_bits_free); test(); if (j >= 0) { temp = extract_l(L_shr(current_word,j)); out_word = add(out_word,temp); out_words[out_word_index++] = out_word; move16(); out_word_bits_free = 16; move16(); out_word_bits_free = sub(out_word_bits_free,j); acca = (current_word << out_word_bits_free); out_word = extract_l(acca); } else { j = negate(j); acca = (current_word << j); accb = L_deposit_l(out_word); acca = L_add(accb,acca); out_word = extract_l(acca); out_word_bits_free = sub(out_word_bits_free,current_word_bits_left); } } /* These code bits are left justified. */ for (region=0;region<number_of_regions; region++) { accb = L_deposit_l(out_word_index); accb = L_shl(accb,4); accb = L_sub(accb,number_of_bits_per_frame); test(); if(accb < 0) { temp = shl(region,2); in_word_ptr = ®ion_mlt_bits[temp]; region_bit_count = region_mlt_bit_counts[region]; move16(); temp = sub(32,region_bit_count); test(); if(temp > 0) current_word_bits_left = region_bit_count; else current_word_bits_left = 32; current_word = *in_word_ptr++; acca = L_deposit_l(out_word_index); acca = L_shl(acca,4); acca = L_sub(acca,number_of_bits_per_frame); /* from while loop */ test(); test(); logic16(); while ((region_bit_count > 0) && (acca < 0)) { /* from while loop */ test(); test(); logic16(); temp = sub(current_word_bits_left,out_word_bits_free); test(); if (temp >= 0) { temp = sub(32,out_word_bits_free); accb = LU_shr(current_word,temp); slice = (UWord16)extract_l(accb); out_word = add(out_word,slice); test(); current_word <<= out_word_bits_free; current_word_bits_left = sub(current_word_bits_left,out_word_bits_free); out_words[out_word_index++] = extract_l(out_word); move16(); out_word = 0; move16(); out_word_bits_free = 16; move16(); } else { temp = sub(32,current_word_bits_left); accb = LU_shr(current_word,temp); slice = (UWord16)extract_l(accb); temp = sub(out_word_bits_free,current_word_bits_left); test(); accb = slice << temp; acca = L_deposit_l(out_word); acca = L_add(acca,accb); out_word = extract_l(acca); out_word_bits_free = sub(out_word_bits_free,current_word_bits_left); current_word_bits_left = 0; move16(); } test(); if (current_word_bits_left == 0) { current_word = *in_word_ptr++; region_bit_count = sub(region_bit_count,32); /* current_word_bits_left = MIN(32,region_bit_count); */ temp = sub(32,region_bit_count); test(); if(temp > 0) current_word_bits_left = region_bit_count; else current_word_bits_left = 32; } acca = L_deposit_l(out_word_index); acca = L_shl(acca,4); acca = L_sub(acca,number_of_bits_per_frame); } accb = L_deposit_l(out_word_index); accb = L_shl(accb,4); accb = L_sub(accb,number_of_bits_per_frame); } } /* Fill out with 1's. */ test(); while (acca < 0) { test(); current_word = 0x0000ffff; move32(); temp = sub(16,out_word_bits_free); acca = LU_shr(current_word,temp); slice = (UWord16)extract_l(acca); out_word = add(out_word,slice); out_words[out_word_index++] = out_word; move16(); out_word = 0; move16(); out_word_bits_free = 16; move16(); acca = L_deposit_l(out_word_index); acca = L_shl(acca,4); acca = L_sub(acca,number_of_bits_per_frame); } }