Esempio n. 1
0
/***************************************************************************
 Function:    vector_huffman

 Syntax:      Word16 vector_huffman(Word16  category,     
                                    Word16  power_index,  
                                    Word16  *raw_mlt_ptr,  
                                    UWord32 *word_ptr)     
                                              
              inputs:     Word16  category
                          Word16  power_index
                          Word16  *raw_mlt_ptr
             
              outputs:    number_of_region_bits
                          *word_ptr
                                      

 Description: Huffman encoding for each region based on category and power_index  

 WMOPS:     7kHz |    24kbit    |     32kbit
          -------|--------------|----------------
            AVG  |    0.03      |     0.03
          -------|--------------|----------------  
            MAX  |    0.04      |     0.04
          -------|--------------|---------------- 

           14kHz |    24kbit    |     32kbit     |     48kbit
          -------|--------------|----------------|----------------
            AVG  |    0.03      |     0.03       |     0.03
          -------|--------------|----------------|----------------
            MAX  |    0.04      |     0.04       |     0.04
          -------|--------------|----------------|----------------

***************************************************************************/
Word16 vector_huffman(Word16 category,
                      Word16 power_index,
                      Word16 *raw_mlt_ptr,
                      UWord32 *word_ptr)
{
 

    Word16  inv_of_step_size_times_std_dev;
    Word16  j,n;
    Word16  k;
    Word16  number_of_region_bits;
    Word16  number_of_non_zero;
    Word16  vec_dim;
    Word16  num_vecs;
    Word16  kmax, kmax_plus_one;
    Word16  index,signs_index;
    Word16  *bitcount_table_ptr;
    UWord16 *code_table_ptr;
    Word32  code_bits;
    Word16  number_of_code_bits;
    UWord32 current_word;
    Word16  current_word_bits_free;
    
    Word32 acca;
    Word32 accb;
    Word16 temp;

    Word16 mytemp;			 /* new variable in Release 1.2 */
    Word16 myacca;			 /* new variable in Release 1.2 */


    /* initialize variables */
    vec_dim = vector_dimension[category];
    move16();

    num_vecs = number_of_vectors[category];
    move16();

    kmax = max_bin[category];
    move16();

    kmax_plus_one = add(kmax,1);
    move16();

    current_word = 0L;
    move16();

    current_word_bits_free = 32;
    move16();

    number_of_region_bits = 0;
    move16();

    /* set up table pointers */
    bitcount_table_ptr = (Word16 *)table_of_bitcount_tables[category];
    code_table_ptr = (UWord16 *) table_of_code_tables[category];

    /* compute inverse of step size * standard deviation */
    acca = L_mult(step_size_inverse_table[category],standard_deviation_inverse_table[power_index]);
    acca = L_shr(acca,1);
    acca = L_add(acca,4096);
    acca = L_shr(acca,13);

	/*
	 *  The next two lines are new to Release 1.2 
	 */
     
	mytemp = acca & 0x3;
    acca = L_shr(acca,2);

    inv_of_step_size_times_std_dev = extract_l(acca);


    for (n=0; n<num_vecs; n++)
    {
        index = 0;
        move16();
        
        signs_index = 0;
        move16();
        
        number_of_non_zero = 0;
        move16();
        
        for (j=0; j<vec_dim; j++)
        {
            k = abs_s(*raw_mlt_ptr);
            
            acca = L_mult(k,inv_of_step_size_times_std_dev);
            acca = L_shr(acca,1);
		    
			/*
			 *  The next four lines are new to Release 1.2
			 */

			myacca = (Word16)L_mult(k,mytemp);
			myacca = (Word16)L_shr(myacca,1);
			myacca = (Word16)L_add(myacca,int_dead_zone_low_bits[category]);
			myacca = (Word16)L_shr(myacca,2);

            acca = L_add(acca,int_dead_zone[category]);

			/*
			 *  The next two lines are new to Release 1.2
			 */

			acca = L_add(acca,myacca);
			acca = L_shr(acca,13);

            k = extract_l(acca);

            test();
            if (k != 0)
            {
                number_of_non_zero = add(number_of_non_zero,1);
                signs_index = shl(signs_index,1);
                
                test();
                if (*raw_mlt_ptr > 0)
                {
                    signs_index = add(signs_index,1);
                }
                
                temp = sub(k,kmax);
                test();
                if (temp > 0)
                {
                    k = kmax;
                    move16();
                }
            }
            acca = L_shr(L_mult(index,(kmax_plus_one)),1);
            index = extract_l(acca);
            index = add(index,k);
            raw_mlt_ptr++;
        }

        code_bits = *(code_table_ptr+index);
        number_of_code_bits = add((*(bitcount_table_ptr+index)),number_of_non_zero);
        number_of_region_bits = add(number_of_region_bits,number_of_code_bits);

        acca = code_bits << number_of_non_zero;
        accb = L_deposit_l(signs_index);
        acca = L_add(acca,accb);
        code_bits = acca;
        move32();

        /* msb of codebits is transmitted first. */
        j = sub(current_word_bits_free,number_of_code_bits);
        test();
        if (j >= 0)
        {
            test();
            acca = code_bits << j;
            current_word = L_add(current_word,acca);
            current_word_bits_free = j;
            move16();
        }
        else
        {
            j = negate(j);
            acca = L_shr(code_bits,j);
            current_word = L_add(current_word,acca);
            
            *word_ptr++ = current_word;
            move16();

            current_word_bits_free = sub(32,j);
            test();
            current_word = code_bits << current_word_bits_free;
        }
    }

    *word_ptr++ = current_word;
    move16();

    return (number_of_region_bits);
}
Esempio n. 2
0
mat_GF2E operator-(const mat_GF2E& a)
{
   mat_GF2E res;
   negate(res, a);
   NTL_OPT_RETURN(mat_GF2E, res);
}
Esempio n. 3
0
static
long swap(long k, mat_ZZ& B, vec_long& P, vec_ZZ& D,
          vec_vec_ZZ& lam, mat_ZZ* U, long m, long verbose)

// swaps vectors k-1 and k;  assumes P(k-1) != 0
// returns 1 if vector k-1 need to be reduced after the swap...
//    this only occurs in 'case 2' when there are linear dependencies

{
   long i, j;
   static ZZ t1, t2, t3, e, x, y;


   if (P(k) != 0) {

      swap(B(k-1), B(k));
      if (U) swap((*U)(k-1), (*U)(k));

      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            swap(lam(k-1)(P(j)), lam(k)(P(j)));

      for (i = k+1; i <= m; i++) {
         MulAddDiv(t1, lam(i)(P(k)-1), lam(i)(P(k)),
                   lam(k)(P(k)-1), D[P(k)-2], D[P(k)-1]);
         MulSubDiv(t2, lam(i)(P(k)-1), lam(i)(P(k)),
                   D[P(k)], lam(k)(P(k)-1), D[P(k)-1]);
         lam(i)(P(k)-1) = t1;
         lam(i)(P(k)) = t2;
      }

      MulAddDiv(D[P(k)-1], D[P(k)], lam(k)(P(k)-1),
                D[P(k)-2], lam(k)(P(k)-1), D[P(k)-1]);

      return 0;
   }
   else if (!IsZero(lam(k)(P(k-1)))) {
      XGCD(e, x, y, lam(k)(P(k-1)), D[P(k-1)]);

      ExactDiv(t1, lam(k)(P(k-1)), e);
      ExactDiv(t2, D[P(k-1)], e);

      t3 = t2;
      negate(t2, t2);
      RowTransform(B(k-1), B(k), t1, t2, y, x);
      if (U) RowTransform((*U)(k-1), (*U)(k), t1, t2, y, x);
      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            RowTransform(lam(k-1)(P(j)), lam(k)(P(j)), t1, t2, y, x);

      sqr(t2, t2);
      ExactDiv(D[P(k-1)], D[P(k-1)], t2);

      for (i = k+1; i <= m; i++)
         if (P(i) != 0) {
            ExactDiv(D[P(i)], D[P(i)], t2);
            for (j = i+1; j <= m; j++) {
               ExactDiv(lam(j)(P(i)), lam(j)(P(i)), t2);
            }
         }

      for (i = k+1; i <= m; i++) {
         ExactDiv(lam(i)(P(k-1)), lam(i)(P(k-1)), t3);
      }

      swap(P(k-1), P(k));

      return 1;
   }
   else {
      swap(B(k-1), B(k));
      if (U) swap((*U)(k-1), (*U)(k));

      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            swap(lam(k-1)(P(j)), lam(k)(P(j)));

      swap(P(k-1), P(k));

      return 0;
   }
}
Esempio n. 4
0
void cos(RR& res, const RR& x)
{
   if (x == 0) {
      res = 1;
      return;
   }

   if (Lg2(x) > 1000)
      Error("cos: sorry...argument too large in absolute value");

   long p = RR::precision();

   RR pi, t1, f;
   RR n;

   // we want to write x/pi = (n+1/2) + f, |f| < 1/2....
   // but we have to do *this* very carefully, so that f is computed
   // to precision > p.  I know, this is sick!

   long p1;

   p1 = p + Lg2(x) + 20;


   for (;;) {
      RR::SetPrecision(p1);
      ComputePi(pi);
      xcopy(t1, x/pi);
      xcopy(n, floor(t1));
      xcopy(f, t1 - (n + 0.5));

      if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) {
         // we don't have enough bits of f...increase p1 and continue

         p1 = p1 + max(20, p1/10);
      }
      else
         break;
   }

   RR::SetPrecision(p + NumBits(p) + 10);
   ComputePi(pi);

   xcopy(f, pi * f);

   if (n == 0 || n.exponent() != 0) {
      // n is even, so we negate f, which negates sin(f)

      xcopy(f, -f);
   }

   // Boy, that was painful, but now its over, and we can simply apply
   // the series for sin(f)

   RR t2, s, s1, t;
   long i;

   s = 0;
   xcopy(t, f);

   for (i = 3; ; i=i+2) {
      add(s1, s, t);
      if (s == s1) break;
      xcopy(s, s1);
      mul(t, t, f);
      mul(t, t, f);
      div(t, t, i-1);
      div(t, t, i);
      negate(t, t);
   }

   RR::SetPrecision(p);

   xcopy(res, s);

}
Esempio n. 5
0
void conv(RR& x, const char *s)
{
   long c;
   long cval;
   long sign;
   ZZ a, b;
   long i = 0;

   if (!s) Error("bad RR input");


   c = s[i];
   while (IsWhiteSpace(c)) {
      i++;
      c = s[i];
   }

   if (c == '-') {
      sign = -1;
      i++;
      c = s[i];
   }
   else
      sign = 1;

   long got1 = 0;
   long got_dot = 0;
   long got2 = 0;

   a = 0;
   b = 1;

   cval = CharToIntVal(c);

   if (cval >= 0 && cval <= 9) {
      got1 = 1;

      while (cval >= 0 && cval <= 9) {
         mul(a, a, 10);
         add(a, a, cval);
         i++;
         c = s[i];
         cval = CharToIntVal(c);
      }
   }

   if (c == '.') {
      got_dot = 1;

      i++;
      c = s[i];
      cval = CharToIntVal(c);

      if (cval >= 0 && cval <= 9) {
         got2 = 1;

         while (cval >= 0 && cval <= 9) {
            mul(a, a, 10);
            add(a, a, cval);
            mul(b, b, 10);
            i++;
            c = s[i];
            cval = CharToIntVal(c);
         }
      }
   }

   if (got_dot && !got1 && !got2)  Error("bad RR input");

   ZZ e;

   long got_e = 0;
   long e_sign;

   if (c == 'e' || c == 'E') {
      got_e = 1;

      i++;
      c = s[i];

      if (c == '-') {
         e_sign = -1;
         i++;
         c = s[i];
      }
      else if (c == '+') {
         e_sign = 1;
         i++;
         c = s[i];
      }
      else
         e_sign = 1;


      cval = CharToIntVal(c);

      if (cval < 0 || cval > 9) Error("bad RR input");

      e = 0;
      while (cval >= 0 && cval <= 9) {
         mul(e, e, 10);
         add(e, e, cval);
         i++;
         c = s[i];
         cval = CharToIntVal(c);
      }
   }

   if (!got1 && !got2 && !got_e) Error("bad RR input");

   RR t1, t2, v;

   long old_p = RR::precision();

   if (got1 || got2) {
      ConvPrec(t1, a, max(NumBits(a), 1));
      ConvPrec(t2, b, NumBits(b));
      if (got_e)
         RR::SetPrecision(old_p + 10);

      div(v, t1, t2);
   }
   else
      set(v);

   if (sign < 0)
      negate(v, v);

   if (got_e) {
      if (e >= NTL_OVFBND) Error("RR input overflow");
      long E;
      conv(E, e);
      if (e_sign < 0) E = -E;
      RR::SetPrecision(old_p + 10);
      power(t1, to_RR(10), E);
      mul(v, v, t1);
      RR::prec = old_p;
   }

   xcopy(x, v);
}
_BOOL_RETURN_TYPE 
assertNotEqualCmp(const void *arg1, const void *arg2, cmp_fn_t cmp) {
	return negate(assertEqualCmp(arg1, arg2, cmp));
}
Esempio n. 7
0
mat_ZZ_p operator-(const mat_ZZ_p& a)
{
   mat_ZZ_p res;
   negate(res, a);
   NTL_OPT_RETURN(mat_ZZ_p, res);
}
Esempio n. 8
0
void Coder_ld8a(
      g729a_encoder_state *state,
     Word16 ana[]       /* output  : Analysis parameters */
)
{

  /* LPC analysis */

  Word16 Aq_t[(MP1)*2];         /* A(z)   quantized for the 2 subframes */
  Word16 Ap_t[(MP1)*2];         /* A(z/gamma)       for the 2 subframes */
  Word16 *Aq, *Ap;              /* Pointer on Aq_t and Ap_t             */

  /* Other vectors */

  Word16 h1[L_SUBFR];            /* Impulse response h1[]              */
  Word16 xn[L_SUBFR];            /* Target vector for pitch search     */
  Word16 xn2[L_SUBFR];           /* Target vector for codebook search  */
  Word16 code[L_SUBFR];          /* Fixed codebook excitation          */
  Word16 y1[L_SUBFR];            /* Filtered adaptive excitation       */
  Word16 y2[L_SUBFR];            /* Filtered fixed codebook excitation */
  Word16 g_coeff[4];             /* Correlations between xn & y1       */

  Word16 g_coeff_cs[5];
  Word16 exp_g_coeff_cs[5];      /* Correlations between xn, y1, & y2
                                     <y1,y1>, -2<xn,y1>,
                                          <y2,y2>, -2<xn,y2>, 2<y1,y2> */

  /* Scalars */

  Word16 i, j, k, i_subfr;
  Word16 T_op, T0, T0_min, T0_max, T0_frac;
  Word16 gain_pit, gain_code, index;
  Word16 temp, taming;
  Word32 L_temp;

/*------------------------------------------------------------------------*
 *  - Perform LPC analysis:                                               *
 *       * autocorrelation + lag windowing                                *
 *       * Levinson-durbin algorithm to find a[]                          *
 *       * convert a[] to lsp[]                                           *
 *       * quantize and code the LSPs                                     *
 *       * find the interpolated LSPs and convert to a[] for the 2        *
 *         subframes (both quantized and unquantized)                     *
 *------------------------------------------------------------------------*/
  {
     /* Temporary vectors */
    Word16 r_l[MP1], r_h[MP1];       /* Autocorrelations low and hi          */
    Word16 rc[M];                    /* Reflection coefficients.             */
    Word16 lsp_new[M], lsp_new_q[M]; /* LSPs at 2th subframe                 */

    /* LP analysis */

    Autocorr(state->p_window, M, r_h, r_l);              /* Autocorrelations */
    Lag_window(M, r_h, r_l);                      /* Lag windowing    */
    Levinson(r_h, r_l, Ap_t, rc);                 /* Levinson Durbin  */
    Az_lsp(Ap_t, lsp_new, state->lsp_old);               /* From A(z) to lsp */

    /* LSP quantization */

    Qua_lsp(state, lsp_new, lsp_new_q, ana);
    ana += 2;                         /* Advance analysis parameters pointer */

    /*--------------------------------------------------------------------*
     * Find interpolated LPC parameters in all subframes                  *
     * The interpolated parameters are in array Aq_t[].                   *
     *--------------------------------------------------------------------*/

    Int_qlpc(state->lsp_old_q, lsp_new_q, Aq_t);

    /* Compute A(z/gamma) */

    Weight_Az(&Aq_t[0],   GAMMA1, M, &Ap_t[0]);
    Weight_Az(&Aq_t[MP1], GAMMA1, M, &Ap_t[MP1]);

    /* update the LSPs for the next frame */

    Copy(lsp_new,   state->lsp_old,   M);
    Copy(lsp_new_q, state->lsp_old_q, M);
  }

 /*----------------------------------------------------------------------*
  * - Find the weighted input speech w_sp[] for the whole speech frame   *
  * - Find the open-loop pitch delay                                     *
  *----------------------------------------------------------------------*/

  Residu(&Aq_t[0], &(state->speech[0]), &(state->exc[0]), L_SUBFR);
  Residu(&Aq_t[MP1], &(state->speech[L_SUBFR]), &(state->exc[L_SUBFR]), L_SUBFR);

  {
    Word16 Ap1[MP1];

    Ap = Ap_t;
    Ap1[0] = 4096;
    for(i=1; i<=M; i++)    /* Ap1[i] = Ap[i] - 0.7 * Ap[i-1]; */
       Ap1[i] = sub(Ap[i], mult(Ap[i-1], 22938));
    Syn_filt(Ap1, &(state->exc[0]), &(state->wsp[0]), L_SUBFR, state->mem_w, 1);

    Ap += MP1;
    for(i=1; i<=M; i++)    /* Ap1[i] = Ap[i] - 0.7 * Ap[i-1]; */
       Ap1[i] = sub(Ap[i], mult(Ap[i-1], 22938));
    Syn_filt(Ap1, &(state->exc[L_SUBFR]), &(state->wsp[L_SUBFR]), L_SUBFR, state->mem_w, 1);
  }

  /* Find open loop pitch lag */

  T_op = Pitch_ol_fast(state->wsp, PIT_MAX, L_FRAME);

  /* Range for closed loop pitch search in 1st subframe */

  T0_min = T_op - 3;
  T0_max = T0_min + 6;
  if (T0_min < PIT_MIN)
  {
    T0_min = PIT_MIN;
    T0_max = PIT_MIN + 6;
  }
  else if (T0_max > PIT_MAX)
  {
     T0_max = PIT_MAX;
     T0_min = PIT_MAX - 6;
  }

 /*------------------------------------------------------------------------*
  *          Loop for every subframe in the analysis frame                 *
  *------------------------------------------------------------------------*
  *  To find the pitch and innovation parameters. The subframe size is     *
  *  L_SUBFR and the loop is repeated 2 times.                             *
  *     - find the weighted LPC coefficients                               *
  *     - find the LPC residual signal res[]                               *
  *     - compute the target signal for pitch search                       *
  *     - compute impulse response of weighted synthesis filter (h1[])     *
  *     - find the closed-loop pitch parameters                            *
  *     - encode the pitch delay                                           *
  *     - find target vector for codebook search                           *
  *     - codebook search                                                  *
  *     - VQ of pitch and codebook gains                                   *
  *     - update states of weighting filter                                *
  *------------------------------------------------------------------------*/

  Aq = Aq_t;    /* pointer to interpolated quantized LPC parameters */
  Ap = Ap_t;    /* pointer to weighted LPC coefficients             */

  for (i_subfr = 0;  i_subfr < L_FRAME; i_subfr += L_SUBFR)
  {

    /*---------------------------------------------------------------*
     * Compute impulse response, h1[], of weighted synthesis filter  *
     *---------------------------------------------------------------*/

    h1[0] = 4096;
    Set_zero(&h1[1], L_SUBFR-1);
    Syn_filt(Ap, h1, h1, L_SUBFR, &h1[1], 0);

   /*----------------------------------------------------------------------*
    *  Find the target vector for pitch search:                            *
    *----------------------------------------------------------------------*/

    Syn_filt(Ap, &(state->exc[i_subfr]), xn, L_SUBFR, state->mem_w0, 0);

    /*---------------------------------------------------------------------*
     *                 Closed-loop fractional pitch search                 *
     *---------------------------------------------------------------------*/

    T0 = Pitch_fr3_fast(&(state->exc[i_subfr]), xn, h1, L_SUBFR, T0_min, T0_max,
                    i_subfr, &T0_frac);

    index = Enc_lag3(T0, T0_frac, &T0_min, &T0_max,PIT_MIN,PIT_MAX,i_subfr);

    *ana++ = index;

    if (i_subfr == 0) {
      *ana++ = Parity_Pitch(index);
    }

   /*-----------------------------------------------------------------*
    *   - find filtered pitch exc                                     *
    *   - compute pitch gain and limit between 0 and 1.2              *
    *   - update target vector for codebook search                    *
    *-----------------------------------------------------------------*/

    Syn_filt(Ap, &(state->exc[i_subfr]), y1, L_SUBFR, state->mem_zero, 0);

    gain_pit = G_pitch(xn, y1, g_coeff, L_SUBFR);

    /* clip pitch gain if taming is necessary */

    taming = test_err(state, T0, T0_frac);

    if( taming == 1){
      if (gain_pit > GPCLIP) {
        gain_pit = GPCLIP;
      }
    }

    /* xn2[i]   = xn[i] - y1[i] * gain_pit  */

    for (i = 0; i < L_SUBFR; i++)
    {
      //L_temp = L_mult(y1[i], gain_pit);
      //L_temp = L_shl(L_temp, 1);               /* gain_pit in Q14 */
      L_temp = ((Word32)y1[i] * gain_pit) << 2;
      xn2[i] = sub(xn[i], extract_h(L_temp));
    }


   /*-----------------------------------------------------*
    * - Innovative codebook search.                       *
    *-----------------------------------------------------*/

    index = ACELP_Code_A(xn2, h1, T0, state->sharp, code, y2, &i);

    *ana++ = index;        /* Positions index */
    *ana++ = i;            /* Signs index     */


   /*-----------------------------------------------------*
    * - Quantization of gains.                            *
    *-----------------------------------------------------*/

    g_coeff_cs[0]     = g_coeff[0];            /* <y1,y1> */
    exp_g_coeff_cs[0] = negate(g_coeff[1]);    /* Q-Format:XXX -> JPN */
    g_coeff_cs[1]     = negate(g_coeff[2]);    /* (xn,y1) -> -2<xn,y1> */
    exp_g_coeff_cs[1] = negate(add(g_coeff[3], 1)); /* Q-Format:XXX -> JPN */

    Corr_xy2( xn, y1, y2, g_coeff_cs, exp_g_coeff_cs );  /* Q0 Q0 Q12 ^Qx ^Q0 */
                         /* g_coeff_cs[3]:exp_g_coeff_cs[3] = <y2,y2>   */
                         /* g_coeff_cs[4]:exp_g_coeff_cs[4] = -2<xn,y2> */
                         /* g_coeff_cs[5]:exp_g_coeff_cs[5] = 2<y1,y2>  */

    *ana++ = Qua_gain(code, g_coeff_cs, exp_g_coeff_cs,
                         L_SUBFR, &gain_pit, &gain_code, taming);


   /*------------------------------------------------------------*
    * - Update pitch sharpening "sharp" with quantized gain_pit  *
    *------------------------------------------------------------*/

    state->sharp = gain_pit;
    if (state->sharp > SHARPMAX)      { state->sharp = SHARPMAX;         }
    else if (state->sharp < SHARPMIN) { state->sharp = SHARPMIN;         }

   /*------------------------------------------------------*
    * - Find the total excitation                          *
    * - update filters memories for finding the target     *
    *   vector in the next subframe                        *
    *------------------------------------------------------*/

    for (i = 0; i < L_SUBFR;  i++)
    {
      /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */
      /* exc[i]  in Q0   gain_pit in Q14               */
      /* code[i] in Q13  gain_cod in Q1                */

      //L_temp = L_mult(exc[i+i_subfr], gain_pit);
      //L_temp = L_mac(L_temp, code[i], gain_code);
      //L_temp = L_shl(L_temp, 1);
      L_temp = (Word32)(state->exc[i+i_subfr]) * (Word32)gain_pit +
               (Word32)code[i] * (Word32)gain_code;
      L_temp <<= 2;
      state->exc[i+i_subfr] = g_round(L_temp);
    }

    update_exc_err(state, gain_pit, T0);

    for (i = L_SUBFR-M, j = 0; i < L_SUBFR; i++, j++)
    {
      temp       = ((Word32)y1[i] * (Word32)gain_pit)  >> 14;
      k          = ((Word32)y2[i] * (Word32)gain_code) >> 13;
      state->mem_w0[j]  = sub(xn[i], add(temp, k));
    }

    Aq += MP1;           /* interpolated LPC parameters for next subframe */
    Ap += MP1;

  }

 /*--------------------------------------------------*
  * Update signal for next frame.                    *
  * -> shift to the left by L_FRAME:                 *
  *     speech[], wsp[] and  exc[]                   *
  *--------------------------------------------------*/

  Copy(&(state->old_speech[L_FRAME]), &(state->old_speech[0]), L_TOTAL-L_FRAME);
  Copy(&(state->old_wsp[L_FRAME]), &(state->old_wsp[0]), PIT_MAX);
  Copy(&(state->old_exc[L_FRAME]), &(state->old_exc[0]), PIT_MAX+L_INTERPOL);
}
NTL_START_IMPL


void CharPoly(zz_pX& f, const mat_zz_p& M)
{
   long n = M.NumRows();
   if (M.NumCols() != n)
      LogicError("CharPoly: nonsquare matrix");

   if (n == 0) {
      set(f);
      return;
   }

   zz_p t;

   if (n == 1) {
      SetX(f);
      negate(t, M(1, 1));
      SetCoeff(f, 0, t);
      return;
   }

   mat_zz_p H;

   H = M;

   long i, j, m;
   zz_p u, t1;

   for (m = 2; m <= n-1; m++) {
      i = m;
      while (i <= n && IsZero(H(i, m-1)))
         i++;

      if (i <= n) {
         t = H(i, m-1);
         if (i > m) {
            swap(H(i), H(m));
            // swap columns i and m
            for (j = 1; j <= n; j++) 
               swap(H(j, i), H(j, m));
         }

         for (i = m+1; i <= n; i++) {
            div(u, H(i, m-1), t);
            for (j = m; j <= n; j++) {
               mul(t1, u, H(m, j));
               sub(H(i, j), H(i, j), t1);
            }

            for (j = 1; j <= n; j++) {
               mul(t1, u, H(j, i));
               add(H(j, m), H(j, m), t1);
            }
         }
      }
   }

   vec_zz_pX F;
   F.SetLength(n+1);
   zz_pX T;
   T.SetMaxLength(n);

   set(F[0]);
   for (m = 1; m <= n; m++) {
      LeftShift(F[m], F[m-1], 1);
      mul(T, F[m-1], H(m, m));
      sub(F[m], F[m], T);

      set(t);
      for (i = 1; i <= m-1; i++) {
         mul(t, t, H(m-i+1, m-i));
         mul(t1, t, H(m-i, m));
         mul(T, F[m-i-1], t1);
         sub(F[m], F[m], T);
      }
   }

   f = F[n];
}
Esempio n. 10
0
vec_RR operator-(const vec_RR& a)
{
   vec_RR res;
   negate(res, a);
   NTL_OPT_RETURN(vec_RR, res);
}
Esempio n. 11
0
	constexpr Rational operator -() const {
		return negate();
	}
Esempio n. 12
0
void inv(GF2E& d, Mat<GF2E>& X, const Mat<GF2E>& A)
{
   long n = A.NumRows();

   if (A.NumCols() != n)
      LogicError("inv: nonsquare matrix");

   if (n == 0) {
      set(d);
      X.SetDims(0, 0);
      return;
   }

   const GF2XModulus& G = GF2E::modulus();

   GF2X t1, t2;
   GF2X pivot;
   GF2X pivot_inv;

   Vec< GF2XVec > M;
   // scratch space

   M.SetLength(n);
   for (long i = 0; i < n; i++) {
      M[i].SetSize(n, 2*GF2E::WordLength());
      for (long j = 0; j < n; j++) {
         M[i][j] = rep(A[i][j]);
      }
   }

   GF2X det;
   det = 1;


   Vec<long> P;
   P.SetLength(n);
   for (long k = 0; k < n; k++) P[k] = k;
   // records swap operations
   

   GF2Context GF2_context;
   GF2_context.save();
   double sz = GF2E_SizeInWords();

   bool seq = double(n)*double(n)*sz*sz < PAR_THRESH;

   bool pivoting = false;

   for (long k = 0; k < n; k++) {

      long pos = -1;

      for (long i = k; i < n; i++) {
         rem(pivot, M[i][k], G);
         if (pivot != 0) {
            InvMod(pivot_inv, pivot, G);
            pos = i;
            break;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
            negate(det, det); 
            P[k] = pos;
            pivoting = true;
         }

         MulMod(det, det, pivot, G);

         {
            // multiply row k by pivot_inv
            GF2X *y = &M[k][0];
            for (long j = 0; j < n; j++) {
               rem(t2, y[j], G);
               MulMod(y[j], t2, pivot_inv, G);
            }
            y[k] = pivot_inv;
         }


         NTL_GEXEC_RANGE(seq, n, first, last)  
         NTL_IMPORT(n)
         NTL_IMPORT(k)

         GF2_context.restore();

         GF2X *y = &M[k][0]; 
         GF2X t1, t2;

         for (long i = first; i < last; i++) {
            if (i == k) continue; // skip row k

            GF2X *x = &M[i][0]; 
            rem(t1, x[k], G);
            negate(t1, t1); 
            x[k] = 0;
            if (t1 == 0) continue;

            // add t1 * row k to row i
            for (long j = 0; j < n; j++) {
               mul(t2, y[j], t1);
               add(x[j], x[j], t2);
            }
         }
         NTL_GEXEC_RANGE_END
      }
      else {
         clear(d);
         return;
      }
   }
Esempio n. 13
0
static Word16 D2i40_11( /* (o)    : Index of pulses positions.            */

  Word16 Dn[],          /* (i)    : Correlations between h[] and Xn[].    */

  Word16 rr[],          /* (i)    : Correlations of impulse response h[]. */

  Word16 h[],           /* (i)    : Impulse response of filters.          */

  Word16 code[],        /* (o)    : Selected algebraic codeword.          */

  Word16 y[],           /* (o)    : Filtered algebraic codeword.          */

  Word16 *sign         /* (o)    : Signs of 4 pulses.                    */

)

{

   Word16  i0, i1, ip0, ip1, p0, p1;

   Word16  i, j, index, tmp, swap;

   Word16  ps0, ps1, alp, alp0;

   Word32  alp1;

   Word16  ps1c, psc, alpha;

   Word32  L_temp;

   Word16 posIndex[2], signIndex[2];

   Word16 m0_bestPos, m1_bestPos;

   

   Word16  p_sign[L_SUBFR];



   Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4;

   Word16 *rri0i1, *RRi1i1, *rri0i3, *RRi3i4;

   Word16 *rri1i2, *rri1i3, *rri1i4;

   Word16 *rri2i3;



   Word16 *ptr_ri0i0, *ptr_ri1i1;

   Word16 *ptr_ri0i1, *ptr_Ri0i2, *ptr_ri0i3, *ptr_Ri3i4;

   Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4;

   Word16 *ptr_ri2i3;



   Word16 *outPtr_ri1i1; /* Outside loop pointer */



   /* Init pointers */



   rri0i0 = rr;

   rri1i1 = rri0i0 + NB_POS;

   rri2i2 = rri1i1 + NB_POS;

   rri3i3 = rri2i2 + NB_POS;

   rri4i4 = rri3i3 + NB_POS;



   rri0i1 = rri4i4 + NB_POS;

   RRi1i1 = rri0i1 + MSIZE;   /* Special for 6.4 kbps */

   rri0i3 = RRi1i1 + MSIZE;

   RRi3i4 = rri0i3 + MSIZE;   /* Special for 6.4 kbps */

   rri1i2 = RRi3i4 + MSIZE;

   rri1i3 = rri1i2 + MSIZE;

   rri1i4 = rri1i3 + MSIZE;

   rri2i3 = rri1i4 + MSIZE;



 /*-----------------------------------------------------------------------*

  * Chose the sign of the impulse.                                        *

  *-----------------------------------------------------------------------*/



   for (i=0; i<L_SUBFR; i++)

   {

     if( Dn[i] >= 0)

     {

       p_sign[i] = 0x7fff;

     }

     else

     {

       p_sign[i] = (Word16)0x8000;

       Dn[i] = negate(Dn[i]);

     }

   }

      

  /*-------------------------------------------------------------------*

   * Modification of rrixiy[] to take signs into account.              *

   *-------------------------------------------------------------------*/



  ptr_ri0i1 = rri0i1;

  ptr_ri0i3 = rri0i3;

  for(i0=0; i0<L_SUBFR; i0+=STEP) {

     for(i1=1; i1<L_SUBFR; i1+=STEP) {

       *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1]));

        ptr_ri0i1++;

       *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2]));

        ptr_ri0i3++;

     }

   }



   ptr_ri1i2 = rri1i2;

   ptr_ri1i3 = rri1i3;

   ptr_ri1i4 = rri1i4;

   for(i0=1; i0<L_SUBFR; i0+=STEP) {

      for(i1=2; i1<L_SUBFR; i1+=STEP) {

        *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i0], p_sign[i1]));

         ptr_ri1i2++;

        *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i0], p_sign[i1+1]));

         ptr_ri1i3++;

        *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i0], p_sign[i1+2]));

         ptr_ri1i4++;

      }

   }



   ptr_ri2i3 = rri2i3;

   ptr_Ri3i4 = RRi3i4;

   for(i0=2; i0<L_SUBFR; i0+=STEP) {

      for(i1=3; i1<L_SUBFR; i1+=STEP) {

        *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i0], p_sign[i1]));

         ptr_ri2i3++;

        *ptr_Ri3i4 = mult(*ptr_Ri3i4, mult(p_sign[i0+1], p_sign[i1+1]));

         ptr_Ri3i4++;

      }

   }



   ptr_Ri0i2 = RRi1i1;

   for(i0=1; i0<L_SUBFR; i0+=STEP) {

      for(i1=1; i1<L_SUBFR; i1+=STEP) {

        *ptr_Ri0i2 = mult(*ptr_Ri0i2, mult(p_sign[i0], p_sign[i1]));

         ptr_Ri0i2++;

      }

   }

   

  /*-------------------------------------------------------------------*

   * The actual search.                                                *

   *-------------------------------------------------------------------*/



   ip0 = 1;                     /* Set to any valid pulse position */

   ip1 = 0;                     /* Set to any valid pulse position */

   psc = 0;

   alpha = MAX_16;

   ptr_ri0i1 = rri0i1;



   outPtr_ri1i1 = rri1i1;       /* Initial values for tripple loop below */

   p0=0;                        /* Search i0,sub0 vs. i1,sub0 */ 

   p1=1;

   ptr_ri0i0 = rri0i0;



   for (i = 0; i<9; i++) {



      if (i == 4) i++;          /* To get right exchange sequence */

      swap = i & 1;



      if (i == 1) p0=1;         /* Search i0,sub1 vs. i1,sub0 */



      else if (i == 2) {        /* Search i0,sub0 vs. i1,sub1 */

         outPtr_ri1i1 = rri3i3;

         p0=0;

         p1=3;

         ptr_ri0i0 = rri0i0;

      }



      else if (i == 3) {        /* Search i0,sub3 vs. i1,sub1 */

         outPtr_ri1i1 = rri4i4;

         p0=3;

         p1=4;

         ptr_ri0i0 = rri3i3;

      }



      else if (i == 5) {        /* Search i0,sub2 vs. i1,sub0 */

         outPtr_ri1i1 = rri2i2;

         p0=1;

         p1=2;

         ptr_ri0i0 = rri1i1;

      }



      else if (i == 6) {        /* Search i0,sub1 vs. i1,sub1 */

         outPtr_ri1i1 = rri3i3;

         p1=3;

         ptr_ri0i0 = rri1i1;

      }



      else if (i == 7) {        /* Search i0,sub3 vs. i1,sub0 */

         outPtr_ri1i1 = rri4i4;

         p1=4;

         ptr_ri0i0 = rri1i1;

      }



      else if (i == 8) {        /* Search i0,sub2 vs. i1,sub1 */

         outPtr_ri1i1 = rri3i3;

         p0=2;

         p1=3;

      }



      for (i0 = p0; i0<40; i0+=STEP) {

         ptr_ri1i1 = outPtr_ri1i1;

         ps0 = Dn[i0];

         alp0 = *ptr_ri0i0++;

         for (i1 = p1; i1<40; i1+=STEP) {

            ps1 = add(ps0, Dn[i1]);

            alp1 = L_mult(alp0, 1);

            alp1 = L_mac(alp1, *ptr_ri1i1++, 1);

            alp1 = L_mac(alp1, *ptr_ri0i1++, 2);

            alp = extract_l(L_shr(alp1, 5));

            ps1c = mult(ps1, ps1);

            L_temp = L_mult(ps1c, alpha);

            L_temp = L_msu(L_temp, psc, alp);

            if (L_temp > 0L) {

           psc = ps1c;

           alpha = alp;

           ip0 = i1;

           ip1 = i0;

               if ( swap ) {

              ip0 = i0;

              ip1 = i1;

           }

            }

         }

      }

   }



   /* convert from position to table entry index */
   for (i0=0; i0<16; i0++)

      if (ip0 == trackTable0[i0]) break;   

   ip0=i0;



   for (i1=0; i1<32; i1++)

      if (ip1 == trackTable1[i1]) break;

   ip1=i1;



   m0_bestPos = trackTable0[ip0];

   m1_bestPos = trackTable1[ip1];


   posIndex[0] = grayEncode[ip0];

   posIndex[1] = grayEncode[ip1];



   if (p_sign[m0_bestPos] > 0)

       signIndex[0] = 1;

   else

       signIndex[0] = 0;

   

   if (p_sign[m1_bestPos] > 0)

       signIndex[1] = 1;

   else

       signIndex[1] = 0;



   /* build innovation vector */

   for (i = 0; i < L_SUBFR; i++) code[i] = 0;



   code[m0_bestPos] = shr(p_sign[m0_bestPos], 2);

   code[m1_bestPos] = add( code[m1_bestPos], shr(p_sign[m1_bestPos], 2));

   

   *sign = add(signIndex[1], signIndex[1]);

   *sign = add(*sign, signIndex[0]);



   tmp = shl(posIndex[1], 4);

   index = add(posIndex[0], tmp);



   /* compute filtered cbInnovation */

   for (i = 0; i < L_SUBFR; i++) y[i] = 0;



   if(signIndex[0] == 0) 

       for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]);

   else

       for(i=m0_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = h[j];



   if(signIndex[1] == 0)

       for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]);

   else

       for(i=m1_bestPos, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]);



   return index;

}
Esempio n. 14
0
static Word16 D4i40_17( /* (o)    : Index of pulses positions.               */

  Word16 Dn[],          /* (i)    : Correlations between h[] and Xn[].       */

  Word16 rr[],          /* (i)    : Correlations of impulse response h[].    */

  Word16 h[],           /* (i) Q12: Impulse response of filters.             */

  Word16 cod[],         /* (o) Q13: Selected algebraic codeword.             */

  Word16 y[],           /* (o) Q12: Filtered algebraic codeword.             */

  Word16 *sign,         /* (o)    : Signs of 4 pulses.                       */

  Word16 i_subfr        /* (i)    : subframe flag                            */

)

{

   Word16  i0, i1, i2, i3, ip0, ip1, ip2, ip3;

   Word16  i, j, time;

   Word16  ps0, ps1, ps2, ps3, alp, alp0;

   Word32  alp1, alp2, alp3, L32;

   Word16  ps3c, psc, alpha;

   Word16  average, max0, max1, max2, thres;

   Word32  L_temp;



   Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4;

   Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4;

   Word16 *rri1i2, *rri1i3, *rri1i4;

   Word16 *rri2i3, *rri2i4;



   Word16 *ptr_ri0i0, *ptr_ri1i1, *ptr_ri2i2, *ptr_ri3i3, *ptr_ri4i4;

   Word16 *ptr_ri0i1, *ptr_ri0i2, *ptr_ri0i3, *ptr_ri0i4;

   Word16 *ptr_ri1i2, *ptr_ri1i3, *ptr_ri1i4;

   Word16 *ptr_ri2i3, *ptr_ri2i4;



   Word16  p_sign[L_SUBFR];



   /* Init pointers */



   rri0i0 = rr;

   rri1i1 = rri0i0 + NB_POS;

   rri2i2 = rri1i1 + NB_POS;

   rri3i3 = rri2i2 + NB_POS;

   rri4i4 = rri3i3 + NB_POS;



   rri0i1 = rri4i4 + NB_POS;

   rri0i2 = rri0i1 + MSIZE;

   rri0i3 = rri0i2 + MSIZE;

   rri0i4 = rri0i3 + MSIZE;

   rri1i2 = rri0i4 + MSIZE;

   rri1i3 = rri1i2 + MSIZE;

   rri1i4 = rri1i3 + MSIZE;

   rri2i3 = rri1i4 + MSIZE;

   rri2i4 = rri2i3 + MSIZE;



 /*-----------------------------------------------------------------------*

  * Reset max_time for 1st subframe.                                      *

  *-----------------------------------------------------------------------*/



   if (i_subfr == 0){ extra = 30; }



 /*-----------------------------------------------------------------------*

  * Chose the sign of the impulse.                                        *

  *-----------------------------------------------------------------------*/



   for (i=0; i<L_SUBFR; i++)

   {

     if( Dn[i] >= 0)

     {

       p_sign[i] = 0x7fff;

     }

     else

     {

       p_sign[i] = (Word16)0x8000;

       Dn[i] = negate(Dn[i]);

     }

   }



 /*-------------------------------------------------------------------*

  * - Compute the search threshold after three pulses                 *

  *-------------------------------------------------------------------*/



   /* Find maximum of Dn[i0]+Dn[i1]+Dn[i2] */



   max0 = Dn[0];

   max1 = Dn[1];

   max2 = Dn[2];



   for (i = 5; i < L_SUBFR; i+=STEP)

   {

     if (sub(Dn[i]  , max0) > 0){ max0 = Dn[i];   }

     if (sub(Dn[i+1], max1) > 0){ max1 = Dn[i+1]; }

     if (sub(Dn[i+2], max2) > 0){ max2 = Dn[i+2]; }

   }

   max0 = add(max0, max1);

   max0 = add(max0, max2);



   /* Find average of Dn[i0]+Dn[i1]+Dn[i2] */



   L32 = 0;

   for (i = 0; i < L_SUBFR; i+=STEP)

   {

     L32 = L_mac(L32, Dn[i], 1);

     L32 = L_mac(L32, Dn[i+1], 1);

     L32 = L_mac(L32, Dn[i+2], 1);

   }

   average =extract_l( L_shr(L32, 4));   /* 1/8 of sum */



   /* thres = average + (max0-average)*THRESHFCB; */



   thres = sub(max0, average);

   thres = mult(thres, THRESHFCB);

   thres = add(thres, average);



  /*-------------------------------------------------------------------*

   * Modification of rrixiy[] to take signs into account.              *

   *-------------------------------------------------------------------*/



   ptr_ri0i1 = rri0i1;

   ptr_ri0i2 = rri0i2;

   ptr_ri0i3 = rri0i3;

   ptr_ri0i4 = rri0i4;



   for(i0=0; i0<L_SUBFR; i0+=STEP)

   {

     for(i1=1; i1<L_SUBFR; i1+=STEP)

     {

       *ptr_ri0i1 = mult(*ptr_ri0i1, mult(p_sign[i0], p_sign[i1]));

        ptr_ri0i1++;

       *ptr_ri0i2 = mult(*ptr_ri0i2, mult(p_sign[i0], p_sign[i1+1]));

        ptr_ri0i2++;

       *ptr_ri0i3 = mult(*ptr_ri0i3, mult(p_sign[i0], p_sign[i1+2]));

        ptr_ri0i3++;

       *ptr_ri0i4 = mult(*ptr_ri0i4, mult(p_sign[i0], p_sign[i1+3]));

        ptr_ri0i4++;



     }

   }



   ptr_ri1i2 = rri1i2;

   ptr_ri1i3 = rri1i3;

   ptr_ri1i4 = rri1i4;



   for(i1=1; i1<L_SUBFR; i1+=STEP)

   {

      for(i2=2; i2<L_SUBFR; i2+=STEP)

      {

        *ptr_ri1i2 = mult(*ptr_ri1i2, mult(p_sign[i1], p_sign[i2]));

         ptr_ri1i2++;

        *ptr_ri1i3 = mult(*ptr_ri1i3, mult(p_sign[i1], p_sign[i2+1]));

         ptr_ri1i3++;

        *ptr_ri1i4 = mult(*ptr_ri1i4, mult(p_sign[i1], p_sign[i2+2]));

         ptr_ri1i4++;



      }

   }



   ptr_ri2i3 = rri2i3;

   ptr_ri2i4 = rri2i4;



   for(i2=2; i2<L_SUBFR; i2+=STEP)

   {

      for(i3=3; i3<L_SUBFR; i3+=STEP)

      {

        *ptr_ri2i3 = mult(*ptr_ri2i3, mult(p_sign[i2], p_sign[i3]));

         ptr_ri2i3++;

        *ptr_ri2i4 = mult(*ptr_ri2i4, mult(p_sign[i2], p_sign[i3+1]));

         ptr_ri2i4++;



      }

   }



 /*-------------------------------------------------------------------*

  * Search the optimum positions of the four  pulses which maximize   *

  *     square(correlation) / energy                                  *

  * The search is performed in four  nested loops. At each loop, one  *

  * pulse contribution is added to the correlation and energy.        *

  *                                                                   *

  * The fourth loop is entered only if the correlation due to the     *

  *  contribution of the first three pulses exceeds the preset        *

  *  threshold.                                                       *

  *-------------------------------------------------------------------*/



 /* Default values */



 ip0    = 0;

 ip1    = 1;

 ip2    = 2;

 ip3    = 3;

 psc    = 0;

 alpha  = MAX_16;

 time   = add(MAX_TIME, extra);





 /* Four loops to search innovation code. */



 ptr_ri0i0 = rri0i0;    /* Init. pointers that depend on first loop */

 ptr_ri0i1 = rri0i1;

 ptr_ri0i2 = rri0i2;

 ptr_ri0i3 = rri0i3;

 ptr_ri0i4 = rri0i4;



 for (i0 = 0; i0 < L_SUBFR; i0 += STEP)        /* first pulse loop  */

 {

   ps0  = Dn[i0];

   alp0 = *ptr_ri0i0++;



   ptr_ri1i1 = rri1i1;    /* Init. pointers that depend on second loop */

   ptr_ri1i2 = rri1i2;

   ptr_ri1i3 = rri1i3;

   ptr_ri1i4 = rri1i4;



   for (i1 = 1; i1 < L_SUBFR; i1 += STEP)      /* second pulse loop */

   {

     ps1  = add(ps0, Dn[i1]);



     /* alp1 = alp0 + *ptr_ri1i1++ + 2.0 * ( *ptr_ri0i1++); */



     alp1 = L_mult(alp0, 1);

     alp1 = L_mac(alp1, *ptr_ri1i1++, 1);

     alp1 = L_mac(alp1, *ptr_ri0i1++, 2);



     ptr_ri2i2 = rri2i2;     /* Init. pointers that depend on third loop */

     ptr_ri2i3 = rri2i3;

     ptr_ri2i4 = rri2i4;



     for (i2 = 2; i2 < L_SUBFR; i2 += STEP)    /* third pulse loop */

      {

       ps2  = add(ps1, Dn[i2]);



       /* alp2 = alp1 + *ptr_ri2i2++ + 2.0 * (*ptr_ri0i2++ + *ptr_ri1i2++); */



       alp2 = L_mac(alp1, *ptr_ri2i2++, 1);

       alp2 = L_mac(alp2, *ptr_ri0i2++, 2);

       alp2 = L_mac(alp2, *ptr_ri1i2++, 2);



       /* Test threshold */



       if ( sub(ps2, thres) > 0)

       {



         ptr_ri3i3 = rri3i3;    /* Init. pointers that depend on 4th loop */





         for (i3 = 3; i3 < L_SUBFR; i3 += STEP)      /* 4th pulse loop */

         {

           ps3 = add(ps2, Dn[i3]);



           /* alp3 = alp2 + *ptr_ri3i3++                                */

           /*       + 2.0*( *ptr_ri0i3++ + *ptr_ri1i3++ + *ptr_ri2i3++); */



           alp3 = L_mac(alp2, *ptr_ri3i3++, 1);

           alp3 = L_mac(alp3, *ptr_ri0i3++, 2);

           alp3 = L_mac(alp3, *ptr_ri1i3++, 2);

           alp3 = L_mac(alp3, *ptr_ri2i3++, 2);

           alp  = extract_l(L_shr(alp3, 5));



           ps3c = mult(ps3, ps3);

           L_temp = L_mult(ps3c, alpha);

           L_temp = L_msu(L_temp, psc, alp);

           if( L_temp > 0L )

           {

             psc = ps3c;

             alpha = alp;

             ip0 = i0;

             ip1 = i1;

             ip2 = i2;

             ip3 = i3;

           }

         }  /*  end of for i3 = */

         ptr_ri0i3 -= NB_POS;

         ptr_ri1i3 -= NB_POS;



         ptr_ri4i4 = rri4i4;    /* Init. pointers that depend on 4th loop */





         for (i3 = 4; i3 < L_SUBFR; i3 += STEP)      /* 4th pulse loop */

         {

           ps3 = add(ps2, Dn[i3]);



           /* alp3 = alp2 + *ptr_ri4i4++                                */

           /*       + 2.0*( *ptr_ri0i4++ + *ptr_ri1i4++ + *ptr_ri2i4++); */



           alp3 = L_mac(alp2, *ptr_ri4i4++, 1);

           alp3 = L_mac(alp3, *ptr_ri0i4++, 2);

           alp3 = L_mac(alp3, *ptr_ri1i4++, 2);

           alp3 = L_mac(alp3, *ptr_ri2i4++, 2);

           alp  = extract_l(L_shr(alp3, 5));



           ps3c = mult(ps3, ps3);

           L_temp = L_mult(ps3c, alpha);

           L_temp = L_msu(L_temp, psc, alp);

           if( L_temp > 0L )

           {

             psc = ps3c;

             alpha = alp;

             ip0 = i0;

             ip1 = i1;

             ip2 = i2;

             ip3 = i3;

           }

         }  /*  end of for i3 = */

         ptr_ri0i4 -= NB_POS;

         ptr_ri1i4 -= NB_POS;



         time = sub(time, 1);

         if(time <= 0 ) goto end_search;     /* Maximum time finish */



       }  /* end of if >thres */

       else

       {

         ptr_ri2i3 += NB_POS;

         ptr_ri2i4 += NB_POS;

       }



     } /* end of for i2 = */



     ptr_ri0i2 -= NB_POS;

     ptr_ri1i3 += NB_POS;

     ptr_ri1i4 += NB_POS;



   } /* end of for i1 = */



   ptr_ri0i2 += NB_POS;

   ptr_ri0i3 += NB_POS;

   ptr_ri0i4 += NB_POS;



 } /* end of for i0 = */



end_search:



 extra = time;



 /* Set the sign of impulses */



 i0 = p_sign[ip0];

 i1 = p_sign[ip1];

 i2 = p_sign[ip2];

 i3 = p_sign[ip3];



 /* Find the codeword corresponding to the selected positions */



 for(i=0; i<L_SUBFR; i++) {cod[i] = 0; }



 cod[ip0] = shr(i0, 2);         /* From Q15 to Q13 */

 cod[ip1] = shr(i1, 2);

 cod[ip2] = shr(i2, 2);

 cod[ip3] = shr(i3, 2);



 /* find the filtered codeword */



 for (i = 0; i < L_SUBFR; i++) {y[i] = 0;  }



 if(i0 > 0)

   for(i=ip0, j=0; i<L_SUBFR; i++, j++) {

       y[i] = add(y[i], h[j]); }

 else

   for(i=ip0, j=0; i<L_SUBFR; i++, j++) {

       y[i] = sub(y[i], h[j]); }



 if(i1 > 0)

   for(i=ip1, j=0; i<L_SUBFR; i++, j++) {

       y[i] = add(y[i], h[j]); }

 else

   for(i=ip1, j=0; i<L_SUBFR; i++, j++) {

       y[i] = sub(y[i], h[j]); }



 if(i2 > 0)

   for(i=ip2, j=0; i<L_SUBFR; i++, j++) {

       y[i] = add(y[i], h[j]); }

 else

   for(i=ip2, j=0; i<L_SUBFR; i++, j++) {

       y[i] = sub(y[i], h[j]); }



 if(i3 > 0)

   for(i=ip3, j=0; i<L_SUBFR; i++, j++) {

       y[i] = add(y[i], h[j]); }

 else

   for(i=ip3, j=0; i<L_SUBFR; i++, j++) {

       y[i] = sub(y[i], h[j]); }



 /* find codebook index;  17-bit address */



 i = 0;

 if(i0 > 0) i = add(i, 1);

 if(i1 > 0) i = add(i, 2);

 if(i2 > 0) i = add(i, 4);

 if(i3 > 0) i = add(i, 8);

 *sign = i;



 ip0 = mult(ip0, 6554);         /* ip0/5 */

 ip1 = mult(ip1, 6554);         /* ip1/5 */

 ip2 = mult(ip2, 6554);         /* ip2/5 */

 i   = mult(ip3, 6554);         /* ip3/5 */

 j   = add(i, shl(i, 2));       /* j = i*5 */

 j   = sub(ip3, add(j, 3));     /* j= ip3%5 -3 */

 ip3 = add(shl(i, 1), j);



 i = add(ip0, shl(ip1, 3));

 i = add(i  , shl(ip2, 6));

 i = add(i  , shl(ip3, 9));





 return i;

}
_BOOL_RETURN_TYPE 
assertGreaterEqual(const int arg1, const int arg2) {
	_BOOL_RETURN_TYPE res = negate(assertLessCmp(&arg1, &arg2, (cmp_fn_t)int_cmp));
	if(res == _FALSE) { print_error(arg1, arg2, "not greater or equal to"); }
	return res;
}
Esempio n. 16
0
/* Interpolate between two vertices and put the result into a0.0.  
 * Increment a0.0 accordingly.
 */
void brw_clip_interp_vertex( struct brw_clip_compile *c,
			     struct brw_indirect dest_ptr,
			     struct brw_indirect v0_ptr, /* from */
			     struct brw_indirect v1_ptr, /* to */
			     struct brw_reg t0,
			     bool force_edgeflag)
{
   struct brw_compile *p = &c->func;
   struct brw_reg tmp = get_tmp(c);
   GLuint slot;

   /* Just copy the vertex header:
    */
   /*
    * After CLIP stage, only first 256 bits of the VUE are read
    * back on Ironlake, so needn't change it
    */
   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
      
   /* Iterate over each attribute (could be done in pairs?)
    */
   for (slot = 0; slot < c->vue_map.num_slots; slot++) {
      int varying = c->vue_map.slot_to_varying[slot];
      GLuint delta = brw_vue_slot_to_offset(slot);

      if (varying == VARYING_SLOT_EDGE) {
	 if (force_edgeflag) 
	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
	 else
	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
      } else if (varying == VARYING_SLOT_PSIZ ||
                 varying == VARYING_SLOT_CLIP_DIST0 ||
                 varying == VARYING_SLOT_CLIP_DIST1) {
	 /* PSIZ doesn't need interpolation because it isn't used by the
          * fragment shader.  CLIP_DIST0 and CLIP_DIST1 don't need
          * intepolation because on pre-GEN6, these are just placeholder VUE
          * slots that don't perform any action.
          */
      } else if (varying < VARYING_SLOT_MAX) {
	 /* This is a true vertex result (and not a special value for the VUE
	  * header), so interpolate:
	  *
	  *        New = attr0 + t*attr1 - t*attr0
	  */
	 brw_MUL(p, 
		 vec4(brw_null_reg()),
		 deref_4f(v1_ptr, delta),
		 t0);

	 brw_MAC(p, 
		 tmp,	      
		 negate(deref_4f(v0_ptr, delta)),
		 t0); 
	      
	 brw_ADD(p,
		 deref_4f(dest_ptr, delta), 
		 deref_4f(v0_ptr, delta),
		 tmp);
      }
   }

   if (c->vue_map.num_slots % 2) {
      GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);

      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
   }

   release_tmp(c, tmp);

   /* Recreate the projected (NDC) coordinate in the new vertex
    * header:
    */
   brw_clip_project_vertex(c, dest_ptr );
}
_BOOL_RETURN_TYPE 
assertNotEqual(const int arg1, const int arg2) {
	_BOOL_RETURN_TYPE res = negate(assertEqualCmp(&arg1, &arg2, (cmp_fn_t)int_cmp));
	if(res == _FALSE) { print_error(arg1, arg2, "equals"); }
	return res;
}
Esempio n. 18
0
File: sam2coef.c Progetto: VVer/opal
Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coefs,Word16 dct_length)
{

    Word16	index, vals_left,mag_shift,n;
    Word16	windowed_data[MAX_DCT_LENGTH];
    Word16	*new_ptr, *old_ptr, *sam_low, *sam_high;
    Word16	*win_low, *win_high;
    Word16	*dst_ptr;
    Word16  neg_win_low;
    Word16  samp_high;
    Word16  half_dct_size;
    
    Word32	acca;
    Word32	accb;
    Word16	temp;
    Word16	temp1;
    Word16	temp2;
    Word16	temp5;
   
    half_dct_size = shr(dct_length,1);
   
    /*++++++++++++++++++++++++++++++++++++++++++++*/
    /* Get the first half of the windowed samples */
    /*++++++++++++++++++++++++++++++++++++++++++++*/
    
    dst_ptr  = windowed_data;
    move16();
    
    /* address arithmetic */
    test();
    if (dct_length==DCT_LENGTH)
    {
        win_high = samples_to_rmlt_window + half_dct_size;
    }
    else
    {
        win_high = max_samples_to_rmlt_window + half_dct_size;
    }
    
    win_low  = win_high;
    move16();
    
    /* address arithmetic */
    sam_high = old_samples + half_dct_size;
    
    sam_low  = sam_high;
    move16();
    
    for (vals_left = half_dct_size;vals_left > 0;vals_left--)
    {
        acca = 0L;
        move32();
        
        acca = L_mac(acca,*--win_low, *--sam_low);
        acca = L_mac(acca,*win_high++, *sam_high++);
        temp = round16(acca); 
        
        *dst_ptr++ = temp;
        move16();
    }           
    
    /*+++++++++++++++++++++++++++++++++++++++++++++*/
    /* Get the second half of the windowed samples */
    /*+++++++++++++++++++++++++++++++++++++++++++++*/
    
    sam_low  = new_samples;
    move16();

    /* address arithmetic */
    sam_high = new_samples + dct_length;
    
    for (vals_left = half_dct_size;    vals_left > 0;    vals_left--)
    {
        acca = 0L;
        move32();

        acca = L_mac(acca,*--win_high, *sam_low++);
        neg_win_low = negate(*win_low++);
        samp_high = *--sam_high;
        acca = L_mac(acca, neg_win_low, samp_high);
        temp = round16(acca); 
        
        *dst_ptr++=temp;
        move16();
    }
       
    /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
    /* Save the new samples for next time, when they will be the old samples */
    /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
    
    new_ptr = new_samples;
    move16();

    old_ptr = old_samples;
    move16();

    for (vals_left = dct_length;vals_left > 0;vals_left--)
    {
        *old_ptr++ = *new_ptr++;
        move16();
    }
    
    /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
    /* Calculate how many bits to shift up the input to the DCT.             */
    /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
    
    temp1=0;
    move16();

    for(index=0;index<dct_length;index++)
    {
        temp2 = abs_s(windowed_data[index]);
        temp = sub(temp2,temp1);
        test();
        if(temp > 0)
        {
            move16();
            temp1 = temp2;
        }
    }
    
    mag_shift=0;
    move16();

    temp = sub(temp1,14000);
    test();
    if (temp >= 0)
    {
        mag_shift = 0;
        move16();
    }
    else
    {
        temp = sub(temp1,438);
        test();
        if(temp < 0)
            temp = add(temp1,1);
        else 
        {
            temp = temp1;
            move16();
        }
        accb = L_mult(temp,9587);
        acca = L_shr(accb,20);
        temp5 = extract_l(acca);
        temp = norm_s(temp5);
        test();
        if (temp == 0)
        {
            mag_shift = 9;
            move16();
        }
        else
            mag_shift = sub(temp,6);
        
    }

    acca = 0L;
    move32();
    for(index=0; index<dct_length; index++)
    {
        temp = abs_s( windowed_data[index]);
        acca = L_add(acca,temp);
    }
    
    acca = L_shr(acca,7);
    
    test();
    if (temp1 < acca)
    {
        mag_shift = sub(mag_shift,1);
    }

    test();
    if (mag_shift > 0) 
    {
        for(index=0;index<dct_length;index++)
        {
            windowed_data[index] = shl(windowed_data[index],mag_shift);
        }
    }
    else 
    {
        test();
        if (mag_shift < 0) 
        {
            n = negate(mag_shift);
            for(index=0;index<dct_length;index++)
            {
                windowed_data[index] = shr(windowed_data[index],n);
                move16();
            }
        }
    }

    /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
    /* Perform a Type IV DCT on the windowed data to get the coefficients */
    /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

    dct_type_iv_a(windowed_data, coefs, dct_length);

    return(mag_shift);
}
Esempio n. 19
0
void kernel(mat_ZZ_p& X, const mat_ZZ_p& A)
{
   long m = A.NumRows();
   long n = A.NumCols();

   mat_ZZ_p M;
   long r;

   transpose(M, A);
   r = gauss(M);

   X.SetDims(m-r, m);

   long i, j, k, s;
   ZZ t1, t2;

   ZZ_p T3;

   vec_long D;
   D.SetLength(m);
   for (j = 0; j < m; j++) D[j] = -1;

   vec_ZZ_p inverses;
   inverses.SetLength(m);

   j = -1;
   for (i = 0; i < r; i++) {
      do {
         j++;
      } while (IsZero(M[i][j]));

      D[j] = i;
      inv(inverses[j], M[i][j]); 
   }

   for (k = 0; k < m-r; k++) {
      vec_ZZ_p& v = X[k];
      long pos = 0;
      for (j = m-1; j >= 0; j--) {
         if (D[j] == -1) {
            if (pos == k)
               set(v[j]);
            else
               clear(v[j]);
            pos++;
         }
         else {
            i = D[j];

            clear(t1);

            for (s = j+1; s < m; s++) {
               mul(t2, rep(v[s]), rep(M[i][s]));
               add(t1, t1, t2);
            }

            conv(T3, t1);
            mul(T3, T3, inverses[j]);
            negate(v[j], T3);
         }
      }
   }
}
Esempio n. 20
0
int main(void) {
    int a = 10, b = 0, c = -10;
    printf("The outcome values are following: %d  %d  %d  %d  %d  %d \n", increment(a), increment(b), increment(c), negate(a), negate(b), negate(c));
}
Esempio n. 21
0
void sin(RR& res, const RR& x)
{
   if (x == 0) {
      res = 0;
      return;
   }

   if (Lg2(x) > 1000)
      Error("sin: sorry...argument too large in absolute value");

   long p = RR::precision();

   RR pi, t1, f;
   RR n;


   // we want to make x^2 < 3, so that the series for sin(x)
   // converges nicely, without any nasty cancellations in the
   // first terms of the series.

   RR::SetPrecision(p + NumBits(p) + 10);

   if (x*x < 3) {
      xcopy(f, x);
   }
   else {

      // we want to write x/pi = n + f, |f| < 1/2....
      // but we have to do *this* very carefully, so that f is computed
      // to precision > p.  I know, this is sick!

      long p1;

      p1 = p + Lg2(x) + 20;


      for (;;) {
         RR::SetPrecision(p1);
         ComputePi(pi);
         xcopy(t1, x/pi);
         xcopy(n, floor(t1));
         xcopy(f, t1 - n);
         if (f > 0.5) {
            n++;
            xcopy(f, t1 - n);
         }

         if (f == 0 || p1 < p - Lg2(f) + Lg2(n) + 10) {
            // we don't have enough bits of f...increase p1 and continue

            p1 = p1 + max(20, p1/10);
         }
         else
            break;
      }

      RR::SetPrecision(p + NumBits(p) + 10);
      ComputePi(pi);

      xcopy(f, pi * f);

      if (n != 0 && n.exponent() == 0) {
         // n is odd, so we negate f, which negates sin(f)

         xcopy(f, -f);
      }

   }

   // Boy, that was painful, but now its over, and we can simply apply
   // the series for sin(f)

   RR t2, s, s1, t;
   long i;

   s = 0;
   xcopy(t, f);

   for (i = 3; ; i=i+2) {
      add(s1, s, t);
      if (s == s1) break;
      xcopy(s, s1);
      mul(t, t, f);
      mul(t, t, f);
      div(t, t, i-1);
      div(t, t, i);
      negate(t, t);
   }

   RR::SetPrecision(p);

   xcopy(res, s);

}
Esempio n. 22
0
/*----------------------------------------------------------------------------
; FUNCTION CODE
----------------------------------------------------------------------------*/
Word16 hp_max(
    Word32 corr[],      /* i   : correlation vector.                      */
    Word16 scal_sig[],  /* i   : scaled signal.                           */
    Word16 L_frame,     /* i   : length of frame to compute pitch         */
    Word16 lag_max,     /* i   : maximum lag                              */
    Word16 lag_min,     /* i   : minimum lag                              */
    Word16 *cor_hp_max, /* o   : max high-pass filtered norm. correlation */
    Flag   *pOverflow   /* i/o : overflow Flag                            */
)
{
    Word16 i;
    Word16 *p, *p1;
    Word32 max, t0, t1;
    Word16 max16, t016, cor_max;
    Word16 shift, shift1, shift2;
    Word32 L_temp;

    max = MIN_32;
    t0 = 0L;

    for (i = lag_max - 1; i > lag_min; i--)
    {
        /* high-pass filtering */
        t0 = L_shl(corr[-i], 1, pOverflow);
        L_temp = L_sub(t0, corr[-i-1], pOverflow);
        t0 = L_sub(L_temp, corr[-i+1], pOverflow);
        t0 = L_abs(t0);

        if (t0 >= max)
        {
            max = t0;
        }
    }

    /* compute energy */
    p = scal_sig;
    p1 = &scal_sig[0];
    t0 = 0L;
    for (i = 0; i < L_frame; i++, p++, p1++)
    {
        t0 = L_mac(t0, *p, *p1, pOverflow);
    }

    p = scal_sig;
    p1 = &scal_sig[-1];
    t1 = 0L;
    for (i = 0; i < L_frame; i++, p++, p1++)
    {
        t1 = L_mac(t1, *p, *p1, pOverflow);
    }

    /* high-pass filtering */
    L_temp = L_shl(t0, 1, pOverflow);
    t1 = L_shl(t1, 1, pOverflow);
    t0 = L_sub(L_temp, t1, pOverflow);
    t0 = L_abs(t0);

    /* max/t0 */
    /*  shift1 = sub(norm_l(max), 1);
        max16  = extract_h(L_shl(max, shift1));
        shift2 = norm_l(t0);
        t016 =  extract_h(L_shl(t0, shift2));   */

    t016 = norm_l(max);
    shift1 = t016 - 1;

    L_temp = L_shl(max, shift1, pOverflow);
    max16  = (Word16)(L_temp >> 16);

    shift2 = norm_l(t0);
    L_temp = L_shl(t0, shift2, pOverflow);
    t016 = (Word16)(L_temp >> 16);

    if (t016 != 0)
    {
        cor_max = div_s(max16, t016);
    }
    else
    {
        cor_max = 0;
    }

    shift = shift1 - shift2;

    if (shift >= 0)
    {
        *cor_hp_max = shr(cor_max, shift, pOverflow); /* Q15 */
    }
    else
    {
        *cor_hp_max = shl(cor_max, negate(shift), pOverflow); /* Q15 */
    }

    return 0;
}
Esempio n. 23
0
void negate(RR& z, const RR& a)
{
   xcopy(z, a);
   negate(z.x, z.x);
}
Esempio n. 24
0
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 3;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->key.do_twoside_color)
      do_twoside_color(c);

   if (c->has_flat_shading)
      do_flatshade_triangle(c);


   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      struct brw_reg a2 = offset(c->vert[2], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
	 brw_MUL(p, a2, a2, c->inv_w[2]);
      }


      /* Calculate coefficients for interpolated values:
       */
      if (pc_linear)
      {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));

	 /* calculate dA/dx
	  */
	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 /* calculate dA/dy
	  */
	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);
	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
	  * the send instruction:
	  */	
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* offset */
		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
      }
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Esempio n. 25
0
static Word16 D4i40_17_fast(/*(o) : Index of pulses positions.               */
  Word16 dn[],          /* (i)    : Correlations between h[] and Xn[].       */
  Word16 rr[],          /* (i)    : Correlations of impulse response h[].    */
  Word16 h[],           /* (i) Q12: Impulse response of filters.             */
  Word16 cod[],         /* (o) Q13: Selected algebraic codeword.             */
  Word16 y[],           /* (o) Q12: Filtered algebraic codeword.             */
  Word16 *sign          /* (o)    : Signs of 4 pulses.                       */
)
{
  Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3;
  Word16 i, j, ix, iy, track, trk, max;
  Word16 prev_i0, i1_offset;
  Word16 psk, ps, ps0, ps1, ps2, sq, sq2;
  Word16 alpk, alp, alp_16;
  Word32 s, alp0, alp1, alp2;
  Word16 *p0, *p1, *p2, *p3, *p4;
  Word16 sign_dn[L_SUBFR], sign_dn_inv[L_SUBFR], *psign;
  Word16 tmp_vect[NB_POS];
  Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4;
  Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4;
  Word16 *rri1i2, *rri1i3, *rri1i4;
  Word16 *rri2i3, *rri2i4;

  Word16  *ptr_rri0i3_i4;
  Word16  *ptr_rri1i3_i4;
  Word16  *ptr_rri2i3_i4;
  Word16  *ptr_rri3i3_i4;

     /* Init pointers */
   rri0i0 = rr;
   rri1i1 = rri0i0 + NB_POS;
   rri2i2 = rri1i1 + NB_POS;
   rri3i3 = rri2i2 + NB_POS;
   rri4i4 = rri3i3 + NB_POS;
   rri0i1 = rri4i4 + NB_POS;
   rri0i2 = rri0i1 + MSIZE;
   rri0i3 = rri0i2 + MSIZE;
   rri0i4 = rri0i3 + MSIZE;
   rri1i2 = rri0i4 + MSIZE;
   rri1i3 = rri1i2 + MSIZE;
   rri1i4 = rri1i3 + MSIZE;
   rri2i3 = rri1i4 + MSIZE;
   rri2i4 = rri2i3 + MSIZE;

 /*-----------------------------------------------------------------------*
  * Chose the sign of the impulse.                                        *
  *-----------------------------------------------------------------------*/

   for (i=0; i<L_SUBFR; i++)
   {
     if (dn[i] >= 0)
     {
       sign_dn[i] = MAX_16;
       sign_dn_inv[i] = MIN_16;
     }
     else
     {
       sign_dn[i] = MIN_16;
       sign_dn_inv[i] = MAX_16;
       dn[i] = negate(dn[i]);
     }
   }

 /*-------------------------------------------------------------------*
  * Modification of rrixiy[] to take signs into account.              *
  *-------------------------------------------------------------------*/

  p0 = rri0i1;
  p1 = rri0i2;
  p2 = rri0i3;
  p3 = rri0i4;

  for(i0=0; i0<L_SUBFR; i0+=STEP)
  {
    psign = sign_dn;
    if (psign[i0] < 0) psign = sign_dn_inv;

    for(i1=1; i1<L_SUBFR; i1+=STEP)
    {
      *p0++ = mult(*p0, psign[i1]);
      *p1++ = mult(*p1, psign[i1+1]);
      *p2++ = mult(*p2, psign[i1+2]);
      *p3++ = mult(*p3, psign[i1+3]);
    }
  }

  p0 = rri1i2;
  p1 = rri1i3;
  p2 = rri1i4;

  for(i1=1; i1<L_SUBFR; i1+=STEP)
  {
    psign = sign_dn;
    if (psign[i1] < 0) psign = sign_dn_inv;

    for(i2=2; i2<L_SUBFR; i2+=STEP)
    {
      *p0++ = mult(*p0, psign[i2]);
      *p1++ = mult(*p1, psign[i2+1]);
      *p2++ = mult(*p2, psign[i2+2]);
    }
  }

  p0 = rri2i3;
  p1 = rri2i4;

  for(i2=2; i2<L_SUBFR; i2+=STEP)
  {
    psign = sign_dn;
    if (psign[i2] < 0) psign = sign_dn_inv;

    for(i3=3; i3<L_SUBFR; i3+=STEP)
    {
      *p0++ = mult(*p0, psign[i3]);
      *p1++ = mult(*p1, psign[i3+1]);
    }
  }


 /*-------------------------------------------------------------------*
  * Search the optimum positions of the four pulses which maximize    *
  *     square(correlation) / energy                                  *
  *-------------------------------------------------------------------*/

  psk = -1;
  alpk = 1;

  ptr_rri0i3_i4 = rri0i3;
  ptr_rri1i3_i4 = rri1i3;
  ptr_rri2i3_i4 = rri2i3;
  ptr_rri3i3_i4 = rri3i3;

  /* Initializations only to remove warning from some compilers */

  ip0=0; ip1=1; ip2=2; ip3=3; ix=0; iy=0; ps=0;

  /* search 2 times: track 3 and 4 */
  for (track=3, trk=0; track<5; track++, trk++)
  {
   /*------------------------------------------------------------------*
    * depth first search 3, phase A: track 2 and 3/4.                  *
    *------------------------------------------------------------------*/

    sq = -1;
    alp = 1;

    /* i0 loop: 2 positions in track 2 */

    prev_i0  = -1;

    for (i=0; i<2; i++)
    {
      max = -1;
      /* search "dn[]" maximum position in track 2 */
      for (j=2; j<L_SUBFR; j+=STEP)
      {
        if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0))
        {
          max = dn[j];
          i0 = j;
        }
      }
      prev_i0 = i0;

      j = mult(i0, 6554);        /* j = i0/5 */
      p0 = rri2i2 + j;

      ps1 = dn[i0];
      alp1 = L_mult(*p0, _1_4);

      /* i1 loop: 8 positions in track 2 */

      p0 = ptr_rri2i3_i4 + shl(j, 3);
      p1 = ptr_rri3i3_i4;

      for (i1=track; i1<L_SUBFR; i1+=STEP)
      {
        ps2 = add(ps1, dn[i1]);       /* index increment = STEP */

        /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */
        alp2 = L_mac(alp1, *p0++, _1_2);
        alp2 = L_mac(alp2, *p1++, _1_4);

        sq2 = mult(ps2, ps2);
        alp_16 = round(alp2);

        s = L_msu(L_mult(alp,sq2),sq,alp_16);
        if (s > 0)
        {
          sq = sq2;
          ps = ps2;
          alp = alp_16;
          ix = i0;
          iy = i1;
        }
      }
    }

    i0 = ix;
    i1 = iy;
    i1_offset = shl(mult(i1, 6554), 3);       /* j = 8*(i1/5) */

   /*------------------------------------------------------------------*
    * depth first search 3, phase B: track 0 and 1.                    *
    *------------------------------------------------------------------*/

    ps0 = ps;
    alp0 = L_mult(alp, _1_4);

    sq = -1;
    alp = 1;

    /* build vector for next loop to decrease complexity */

    p0 = rri1i2 + mult(i0, 6554);
    p1 = ptr_rri1i3_i4 + mult(i1, 6554);
    p2 = rri1i1;
    p3 = tmp_vect;

    for (i3=1; i3<L_SUBFR; i3+=STEP)
    {
      /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */
      s = L_mult(*p0, _1_4);        p0 += NB_POS;
      s = L_mac(s, *p1, _1_4);      p1 += NB_POS;
      s = L_mac(s, *p2++, _1_8);
      *p3++ = round(s);
    }

    /* i2 loop: 8 positions in track 0 */

    p0 = rri0i2 + mult(i0, 6554);
    p1 = ptr_rri0i3_i4 + mult(i1, 6554);
    p2 = rri0i0;
    p3 = rri0i1;

    for (i2=0; i2<L_SUBFR; i2+=STEP)
    {
      ps1 = add(ps0, dn[i2]);         /* index increment = STEP */

      /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */
      alp1 = L_mac(alp0, *p0, _1_8);       p0 += NB_POS;
      alp1 = L_mac(alp1, *p1, _1_8);       p1 += NB_POS;
      alp1 = L_mac(alp1, *p2++, _1_16);

      /* i3 loop: 8 positions in track 1 */

      p4 = tmp_vect;

      for (i3=1; i3<L_SUBFR; i3+=STEP)
      {
        ps2 = add(ps1, dn[i3]);       /* index increment = STEP */

        /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */
        alp2 = L_mac(alp1, *p3++, _1_8);
        alp2 = L_mac(alp2, *p4++, _1_2);

        sq2 = mult(ps2, ps2);
        alp_16 = round(alp2);

        s = L_msu(L_mult(alp,sq2),sq,alp_16);
        if (s > 0)
        {
          sq = sq2;
          alp = alp_16;
          ix = i2;
          iy = i3;
        }
      }
    }

   /*----------------------------------------------------------------*
    * depth first search 3: compare codevector with the best case.   *
    *----------------------------------------------------------------*/

    s = L_msu(L_mult(alpk,sq),psk,alp);
    if (s > 0)
    {
      psk = sq;
      alpk = alp;
      ip2 = i0;
      ip3 = i1;
      ip0 = ix;
      ip1 = iy;
    }

   /*------------------------------------------------------------------*
    * depth first search 4, phase A: track 3 and 0.                    *
    *------------------------------------------------------------------*/

    sq = -1;
    alp = 1;

    /* i0 loop: 2 positions in track 3/4 */

    prev_i0  = -1;

    for (i=0; i<2; i++)
    {
      max = -1;
      /* search "dn[]" maximum position in track 3/4 */
      for (j=track; j<L_SUBFR; j+=STEP)
      {
        if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0))
        {
          max = dn[j];
          i0 = j;
        }
      }
      prev_i0 = i0;

      j = mult(i0, 6554);        /* j = i0/5 */
      p0 = ptr_rri3i3_i4 + j;

      ps1 = dn[i0];
      alp1 = L_mult(*p0, _1_4);

      /* i1 loop: 8 positions in track 0 */

      p0 = ptr_rri0i3_i4 + j;
      p1 = rri0i0;

      for (i1=0; i1<L_SUBFR; i1+=STEP)
      {
        ps2 = add(ps1, dn[i1]);       /* index increment = STEP */

        /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */
        alp2 = L_mac(alp1, *p0, _1_2);       p0 += NB_POS;
        alp2 = L_mac(alp2, *p1++, _1_4);

        sq2 = mult(ps2, ps2);
        alp_16 = round(alp2);

        s = L_msu(L_mult(alp,sq2),sq,alp_16);
        if (s > 0)
        {
          sq = sq2;
          ps = ps2;
          alp = alp_16;
          ix = i0;
          iy = i1;
        }
      }
    }

    i0 = ix;
    i1 = iy;
    i1_offset = shl(mult(i1, 6554), 3);       /* j = 8*(i1/5) */

   /*------------------------------------------------------------------*
    * depth first search 4, phase B: track 1 and 2.                    *
    *------------------------------------------------------------------*/

    ps0 = ps;
    alp0 = L_mult(alp, _1_4);

    sq = -1;
    alp = 1;

    /* build vector for next loop to decrease complexity */

    p0 = ptr_rri2i3_i4 + mult(i0, 6554);
    p1 = rri0i2 + i1_offset;
    p2 = rri2i2;
    p3 = tmp_vect;

    for (i3=2; i3<L_SUBFR; i3+=STEP)
    {
      /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */
      s = L_mult(*p0, _1_4);         p0 += NB_POS;
      s = L_mac(s, *p1++, _1_4);
      s = L_mac(s, *p2++, _1_8);
      *p3++ = round(s);
    }

    /* i2 loop: 8 positions in track 1 */

    p0 = ptr_rri1i3_i4 + mult(i0, 6554);
    p1 = rri0i1 + i1_offset;
    p2 = rri1i1;
    p3 = rri1i2;

    for (i2=1; i2<L_SUBFR; i2+=STEP)
    {
      ps1 = add(ps0, dn[i2]);         /* index increment = STEP */

      /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */
      alp1 = L_mac(alp0, *p0, _1_8);       p0 += NB_POS;
      alp1 = L_mac(alp1, *p1++, _1_8);
      alp1 = L_mac(alp1, *p2++, _1_16);

      /* i3 loop: 8 positions in track 2 */

      p4 = tmp_vect;

      for (i3=2; i3<L_SUBFR; i3+=STEP)
      {
        ps2 = add(ps1, dn[i3]);       /* index increment = STEP */

        /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */
        alp2 = L_mac(alp1, *p3++, _1_8);
        alp2 = L_mac(alp2, *p4++, _1_2);

        sq2 = mult(ps2, ps2);
        alp_16 = round(alp2);

        s = L_msu(L_mult(alp,sq2),sq,alp_16);
        if (s > 0)
        {
          sq = sq2;
          alp = alp_16;
          ix = i2;
          iy = i3;
        }
      }
    }

   /*----------------------------------------------------------------*
    * depth first search 1: compare codevector with the best case.   *
    *----------------------------------------------------------------*/

    s = L_msu(L_mult(alpk,sq),psk,alp);
    if (s > 0)
    {
      psk = sq;
      alpk = alp;
      ip3 = i0;
      ip0 = i1;
      ip1 = ix;
      ip2 = iy;
    }

  ptr_rri0i3_i4 = rri0i4;
  ptr_rri1i3_i4 = rri1i4;
  ptr_rri2i3_i4 = rri2i4;
  ptr_rri3i3_i4 = rri4i4;

  }


 /* Set the sign of impulses */

 i0 = sign_dn[ip0];
 i1 = sign_dn[ip1];
 i2 = sign_dn[ip2];
 i3 = sign_dn[ip3];

 /* Find the codeword corresponding to the selected positions */


 for(i=0; i<L_SUBFR; i++) {
   cod[i] = 0;
 }

 cod[ip0] = shr(i0, 2);         /* From Q15 to Q13 */
 cod[ip1] = shr(i1, 2);
 cod[ip2] = shr(i2, 2);
 cod[ip3] = shr(i3, 2);

 /* find the filtered codeword */

 for (i = 0; i < ip0; i++) y[i] = 0;

 if(i0 > 0)
   for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = h[j];
 else
   for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]);

 if(i1 > 0)
   for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]);
 else
   for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]);

 if(i2 > 0)
   for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]);
 else
   for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]);

 if(i3 > 0)
   for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]);
 else
   for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]);

 /* find codebook index;  17-bit address */

 i = 0;
 if(i0 > 0) i = add(i, 1);
 if(i1 > 0) i = add(i, 2);
 if(i2 > 0) i = add(i, 4);
 if(i3 > 0) i = add(i, 8);
 *sign = i;

 ip0 = mult(ip0, 6554);         /* ip0/5 */
 ip1 = mult(ip1, 6554);         /* ip1/5 */
 ip2 = mult(ip2, 6554);         /* ip2/5 */
 i   = mult(ip3, 6554);         /* ip3/5 */
 j   = add(i, shl(i, 2));       /* j = i*5 */
 j   = sub(ip3, add(j, 3));     /* j= ip3%5 -3 */
 ip3 = add(shl(i, 1), j);

 i = add(ip0, shl(ip1, 3));
 i = add(i  , shl(ip2, 6));
 i = add(i  , shl(ip3, 9));

 return i;
}
Esempio n. 26
0
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 2;

   if (allocate)
      alloc_regs(c);

   invert_det(c);
   copy_z_inv_w(c);

   if (c->has_flat_shading)
      do_flatshade_line(c);

   for (i = 0; i < c->nr_setup_regs; i++)
   {
      /* Pair of incoming attributes:
       */
      struct brw_reg a0 = offset(c->vert[0], i);
      struct brw_reg a1 = offset(c->vert[1], i);
      GLushort pc, pc_persp, pc_linear;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      if (pc_persp)
      {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
	 brw_MUL(p, a1, a1, c->inv_w[1]);
      }

      /* Calculate coefficients for position, color:
       */
      if (pc_linear) {
	 set_predicate_control_flag_value(p, c, pc_linear);

	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));

 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
		
	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
      }

      {
	 set_predicate_control_flag_value(p, c, pc);

	 /* start point for interpolation
	  */
	 brw_MOV(p, c->m3C0, a0);

	 /* Copy m0..m3 to URB.
	  */
	 brw_urb_WRITE(p,
		       brw_null_reg(),
		       0,
		       brw_vec8_grf(0, 0),
                       last ? BRW_URB_WRITE_EOT_COMPLETE
                       : BRW_URB_WRITE_NO_FLAGS,
		       4, 	/* msg len */
		       0,	/* response len */
		       i*4,	/* urb destination offset */
		       BRW_URB_SWIZZLE_TRANSPOSE);
      }
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Esempio n. 27
0
void
cosine_of_angle(void)
{
	int n;
	double d;

	if (car(p1) == symbol(ARCCOS)) {
		push(cadr(p1));
		return;
	}

	if (isdouble(p1)) {
		d = cos(p1->u.d);
		if (fabs(d) < 1e-10)
			d = 0.0;
		push_double(d);
		return;
	}

	// cosine function is symmetric, cos(-x) = cos(x)

	if (isnegative(p1)) {
		push(p1);
		negate();
		p1 = pop();
	}

	// cos(arctan(x)) = 1 / sqrt(1 + x^2)

	// see p. 173 of the CRC Handbook of Mathematical Sciences

	if (car(p1) == symbol(ARCTAN)) {
		push_integer(1);
		push(cadr(p1));
		push_integer(2);
		power();
		add();
		push_rational(-1, 2);
		power();
		return;
	}

	// multiply by 180/pi

	push(p1);
	push_integer(180);
	multiply();
	push_symbol(PI);
	divide();

	n = pop_integer();

	if (n < 0) {
		push(symbol(COS));
		push(p1);
		list(2);
		return;
	}

	switch (n % 360) {
	case 90:
	case 270:
		push_integer(0);
		break;
	case 60:
	case 300:
		push_rational(1, 2);
		break;
	case 120:
	case 240:
		push_rational(-1, 2);
		break;
	case 45:
	case 315:
		push_rational(1, 2);
		push_integer(2);
		push_rational(1, 2);
		power();
		multiply();
		break;
	case 135:
	case 225:
		push_rational(-1, 2);
		push_integer(2);
		push_rational(1, 2);
		power();
		multiply();
		break;
	case 30:
	case 330:
		push_rational(1, 2);
		push_integer(3);
		push_rational(1, 2);
		power();
		multiply();
		break;
	case 150:
	case 210:
		push_rational(-1, 2);
		push_integer(3);
		push_rational(1, 2);
		power();
		multiply();
		break;
	case 0:
		push_integer(1);
		break;
	case 180:
		push_integer(-1);
		break;
	default:
		push(symbol(COS));
		push(p1);
		list(2);
		break;
	}
}
Esempio n. 28
0
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
{
   struct brw_compile *p = &c->func;
   GLuint i;

   c->flag_value = 0xff;
   c->nr_verts = 1;

   if (allocate)
      alloc_regs(c);

   copy_z_inv_w(c);
   for (i = 0; i < c->nr_setup_regs; i++)
   {
      struct brw_reg a0 = offset(c->vert[0], i);
      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

      pc_coord_replace = calculate_point_sprite_mask(c, i);
      pc_persp &= ~pc_coord_replace;

      if (pc_persp) {
	 set_predicate_control_flag_value(p, c, pc_persp);
	 brw_MUL(p, a0, a0, c->inv_w[0]);
      }

      /* Point sprite coordinate replacement: A texcoord with this
       * enabled gets replaced with the value (x, y, 0, 1) where x and
       * y vary from 0 to 1 across the horizontal and vertical of the
       * point.
       */
      if (pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc_coord_replace);
	 /* Caculate 1.0/PointWidth */
	 brw_math(&c->func,
		  c->tmp,
		  BRW_MATH_FUNCTION_INV,
		  0,
		  c->dx0,
		  BRW_MATH_DATA_SCALAR,
		  BRW_MATH_PRECISION_FULL);

	 brw_set_access_mode(p, BRW_ALIGN_16);

	 /* dA/dx, dA/dy */
	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
	 } else {
	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
	 }

	 /* attribute constant offset */
	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
	 if (c->key.sprite_origin_lower_left) {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
	 } else {
	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
	 }

	 brw_set_access_mode(p, BRW_ALIGN_1);
      }

      if (pc & ~pc_coord_replace) {
	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
	 brw_MOV(p, c->m3C0, a0); /* constant value */
      }


      set_predicate_control_flag_value(p, c, pc);
      /* Copy m0..m3 to URB. */
      brw_urb_WRITE(p,
		    brw_null_reg(),
		    0,
		    brw_vec8_grf(0, 0),
                    last ? BRW_URB_WRITE_EOT_COMPLETE
                    : BRW_URB_WRITE_NO_FLAGS,
		    4, 	/* msg len */
		    0,	/* response len */
		    i*4,	/* urb destination offset */
		    BRW_URB_SWIZZLE_TRANSPOSE);
   }

   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
Esempio n. 29
0
long LatticeSolve(vec_ZZ& x, const mat_ZZ& A, const vec_ZZ& y, long reduce)
{
   long n = A.NumRows();
   long m = A.NumCols();

   if (y.length() != m)
      Error("LatticeSolve: dimension mismatch");

   if (reduce < 0 || reduce > 2)
      Error("LatticeSolve: bad reduce parameter");

   if (IsZero(y)) {
      x.SetLength(n);
      clear(x);
      return 1;
   }

   mat_ZZ A1, U1;
   ZZ det2;
   long im_rank, ker_rank;

   A1 = A;

   im_rank = image(det2, A1, U1);
   ker_rank = n - im_rank;

   mat_ZZ A2, U2;
   long new_rank;
   long i;

   A2.SetDims(im_rank + 1, m);
   for (i = 1; i <= im_rank; i++)
      A2(i) = A1(ker_rank + i);

   A2(im_rank + 1) = y;

   new_rank = image(det2, A2, U2);

   if (new_rank != im_rank ||
      (U2(1)(im_rank+1) != 1  && U2(1)(im_rank+1) != -1))
      return 0;

   vec_ZZ x1;
   x1.SetLength(im_rank);

   for (i = 1; i <= im_rank; i++)
      x1(i) = U2(1)(i);

   if (U2(1)(im_rank+1) == 1)
      negate(x1, x1);

   vec_ZZ x2, tmp;
   x2.SetLength(n);
   clear(x2);
   tmp.SetLength(n);

   for (i = 1; i <= im_rank; i++) {
      mul(tmp, U1(ker_rank+i), x1(i));
      add(x2, x2, tmp);
   }

   if (reduce == 0) {
      x = x2;
      return 1;
   }
   else if (reduce == 1) {
      U1.SetDims(ker_rank+1, n);
      U1(ker_rank+1) = x2;
      image(det2, U1);

      x = U1(ker_rank + 1);
      return 1;
   }
   else if (reduce == 2) {
      U1.SetDims(ker_rank, n);
      LLL(det2, U1);
      U1.SetDims(ker_rank+1, n);
      U1(ker_rank+1) = x2;
      image(det2, U1);

      x = U1(ker_rank + 1);
      return 1;
   }

   return 0;
}
Esempio n. 30
0
/***************************************************************************
 Function:    bits_to_words

 Syntax:      bits_to_words(UWord32 *region_mlt_bits,              
                            Word16  *region_mlt_bit_counts,                                                             
                            Word16  *drp_num_bits,                                                                      
                            UWord16 *drp_code_bits,                                                                     
                            Word16  *out_words,                                                                         
                            Word16  categorization_control,                                                                         
                            Word16  number_of_regions,
                            Word16  num_categorization_control_bits,
                            Word16  number_of_bits_per_frame)                                                           
                                                                   
                                                                   
 Description: Stuffs the bits into words for output

 WMOPS:     7kHz |    24kbit    |      32kbit
          -------|--------------|----------------
            AVG  |    0.09      |     0.12
          -------|--------------|----------------  
            MAX  |    0.10      |     0.13
          -------|--------------|---------------- 
          
           14kHz |    24kbit    |     32kbit     |     48kbit
          -------|--------------|----------------|----------------
            AVG  |    0.12      |     0.15       |     0.19
          -------|--------------|----------------|----------------
            MAX  |    0.14      |     0.17       |     0.21
          -------|--------------|----------------|----------------

***************************************************************************/
void bits_to_words(UWord32 *region_mlt_bits,
                   Word16  *region_mlt_bit_counts,
                   Word16  *drp_num_bits,
                   UWord16 *drp_code_bits,
                   Word16  *out_words,
                   Word16  categorization_control,
                   Word16  number_of_regions,
                   Word16  num_categorization_control_bits,
                   Word16  number_of_bits_per_frame)
{
    Word16  out_word_index = 0;
    Word16  j;
    Word16  region;
    Word16  out_word;
    Word16  region_bit_count;
    Word16  current_word_bits_left;
    UWord16 slice;
    Word16  out_word_bits_free = 16;
    UWord32 *in_word_ptr;
    UWord32 current_word;
    
    Word32  acca;
    Word32  accb;
    Word16  temp;

    /* First set up the categorization control bits to look like one more set of region power bits. */
    out_word = 0;
    move16();

    drp_num_bits[number_of_regions] = num_categorization_control_bits;
    move16();
    
    drp_code_bits[number_of_regions] = (UWord16)categorization_control;
    move16();

    /* These code bits are right justified. */
    for (region=0; region <= number_of_regions; region++)
    {
        current_word_bits_left = drp_num_bits[region];
        move16();
        
        current_word = (UWord32)drp_code_bits[region];
        move16();
        
        j = sub(current_word_bits_left,out_word_bits_free);

        test();
        if (j >= 0)
        {
            temp = extract_l(L_shr(current_word,j));
            out_word = add(out_word,temp);

            out_words[out_word_index++] = out_word;
            move16();
            
            out_word_bits_free = 16;
            move16();
            
            out_word_bits_free = sub(out_word_bits_free,j);
            
            acca = (current_word << out_word_bits_free);
            out_word = extract_l(acca);
        }
        else
        {
            j = negate(j);

            acca = (current_word << j);
            accb = L_deposit_l(out_word);
            acca = L_add(accb,acca);
            out_word = extract_l(acca);
            
            out_word_bits_free = sub(out_word_bits_free,current_word_bits_left);
        }
    }

    /* These code bits are left justified. */
    
    for (region=0;region<number_of_regions; region++)
    {
        accb = L_deposit_l(out_word_index);
        accb = L_shl(accb,4);
        accb = L_sub(accb,number_of_bits_per_frame);
        test();
        if(accb < 0)        
        {
            temp = shl(region,2);
            in_word_ptr = &region_mlt_bits[temp];
            region_bit_count = region_mlt_bit_counts[region];
            move16();

            temp = sub(32,region_bit_count);
            test();
            if(temp > 0)
                current_word_bits_left = region_bit_count;
            else
                current_word_bits_left = 32;

            current_word = *in_word_ptr++;
    
            acca = L_deposit_l(out_word_index);
            acca = L_shl(acca,4);
            acca = L_sub(acca,number_of_bits_per_frame);
            
            /* from while loop */
            test();
            test();
            logic16(); 
            while ((region_bit_count > 0) && (acca < 0))
            {
                /* from while loop */
                test();
                test();
                logic16(); 
                
                temp = sub(current_word_bits_left,out_word_bits_free);
                test();
                if (temp >= 0)
                {
                    temp = sub(32,out_word_bits_free);
                    accb = LU_shr(current_word,temp);
                    slice = (UWord16)extract_l(accb);
                    
                    out_word = add(out_word,slice);
    
                    test();
                    current_word <<= out_word_bits_free;

                    current_word_bits_left = sub(current_word_bits_left,out_word_bits_free);
                    out_words[out_word_index++] = extract_l(out_word);
                    move16();

                    out_word = 0;
                    move16();

                    out_word_bits_free = 16;
                    move16();
                }
                else
                {
                    temp = sub(32,current_word_bits_left);
                    accb = LU_shr(current_word,temp);
                    slice = (UWord16)extract_l(accb);
                    
                    temp = sub(out_word_bits_free,current_word_bits_left);
                    test();
                    accb = slice << temp;
                    acca = L_deposit_l(out_word);
                    acca = L_add(acca,accb);
                    out_word = extract_l(acca);
                    out_word_bits_free = sub(out_word_bits_free,current_word_bits_left);
                    
                    current_word_bits_left = 0;
                    move16();
                }
    
                test();
                if (current_word_bits_left == 0)
                {
                    current_word = *in_word_ptr++;
                    region_bit_count = sub(region_bit_count,32);
                    
                    /* current_word_bits_left = MIN(32,region_bit_count); */
                    temp = sub(32,region_bit_count);
                    test();
                    if(temp > 0)
                        current_word_bits_left = region_bit_count;
                    else
                        current_word_bits_left = 32;
                    
                }
                acca = L_deposit_l(out_word_index);
                acca = L_shl(acca,4);
                acca = L_sub(acca,number_of_bits_per_frame);
            }
            accb = L_deposit_l(out_word_index);
            accb = L_shl(accb,4);
            accb = L_sub(accb,number_of_bits_per_frame);
        }
    }

    /* Fill out with 1's. */

    test();
    while (acca < 0)
    {
        test();
        current_word = 0x0000ffff;
        move32();

        temp = sub(16,out_word_bits_free);
        acca = LU_shr(current_word,temp);
        slice = (UWord16)extract_l(acca);

        out_word = add(out_word,slice);
        out_words[out_word_index++] = out_word;
        move16();

        out_word = 0;
        move16();
        
        out_word_bits_free = 16;
        move16();
        
        acca = L_deposit_l(out_word_index);
        acca = L_shl(acca,4);
        acca = L_sub(acca,number_of_bits_per_frame);
    }
}