コード例 #1
0
ファイル: preinv_mod_1.c プロジェクト: bsmr-common-lisp/xcl
mp_limb_t
mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
{
  mp_size_t i;
  mp_limb_t n0, r;
  mp_limb_t dummy;

  ASSERT (un >= 1);
  ASSERT (d & GMP_LIMB_HIGHBIT);

  r = up[un - 1];
  if (r >= d)
    r -= d;

  for (i = un - 2; i >= 0; i--)
    {
      n0 = up[i];
      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
    }
  return r;
}
コード例 #2
0
ファイル: div_qr_1n_pi1.c プロジェクト: AllardJ/Tomato
/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
   Requires that uh < d. */
mp_limb_t
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
		   mp_limb_t d, mp_limb_t dinv)
{
  ASSERT (n > 0);
  ASSERT (uh < d);
  ASSERT (d & GMP_NUMB_HIGHBIT);
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));

  do
    {
      mp_limb_t q, ul;

      ul = up[--n];
      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
      qp[n] = q;
    }
  while (n > 0);

  return uh;
}
コード例 #3
0
void sample(void * arg, ulong count)
{
   mp_limb_t d, q, r, dinv, norm;
   mp_ptr array = (mp_ptr) flint_malloc(200 * sizeof(mp_limb_t));
   FLINT_TEST_INIT(state);
   ulong i;
   int j;
   
   

   d = n_randtest_not_zero(state);
   count_leading_zeros(norm, d);
   d <<= norm;
      
   for (i = 0; i < count; i++)
   {
      for (j = 0; j < 200; j+=2)
      {
         do
         {
            array[j] = n_randtest(state);
         } while (array[j] >= d);
         array[j + 1] = n_randtest(state);  
      }
       
      invert_limb(dinv, d);

      prof_start();
      for (j = 0; j < 200; j+=2)
      {
         udiv_qrnnd_preinv(q, r, array[j], array[j+1], d, dinv);
      }
      prof_stop();
      
      if (q + r == 0) flint_printf("\r");
   }

   flint_randclear(state);
   flint_free(array);
}
コード例 #4
0
ファイル: sb_divrem_mn.c プロジェクト: mahdiz/mpclib
mp_limb_t
mpn_sb_divrem_mn (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn)
{
  mp_limb_t most_significant_q_limb = 0;
  mp_size_t qn = nn - dn;
  mp_size_t i;
  mp_limb_t dx, d1, n0;
  mp_limb_t dxinv;
  int use_preinv;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  np += qn;
  dx = dp[dn - 1];
  d1 = dp[dn - 2];
  n0 = np[dn - 1];

  if (n0 >= dx)
    {
      if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
	{
	  mpn_sub_n (np, np, dp, dn);
	  most_significant_q_limb = 1;
	}
    }

  /* use_preinv is possibly a constant, but it's left to the compiler to
     optimize away the unused code in that case.  */
  use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
  if (use_preinv)
    invert_limb (dxinv, dx);

  for (i = qn - 1; i >= 0; i--)
    {
      mp_limb_t q;
      mp_limb_t nx;
      mp_limb_t cy_limb;

      nx = np[dn - 1];		/* FIXME: could get value from r1 */
      np--;

      if (nx == dx)
	{
	  /* This might over-estimate q, but it's probably not worth
	     the extra code here to find out.  */
	  q = GMP_NUMB_MASK;

#if 1
	  cy_limb = mpn_submul_1 (np, dp, dn, q);
#else
	  /* This should be faster on many machines */
	  cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
	  cy = mpn_add_n (np, np, dp, dn);
	  np[dn] += cy;
#endif

	  if (nx != cy_limb)
	    {
	      mpn_add_n (np, np, dp, dn);
	      q--;
	    }

	  qp[i] = q;
	}
      else
	{
	  mp_limb_t rx, r1, r0, p1, p0;

	  /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
	     when np[dn-1] is used in an asm statement like umul_ppmm in
	     udiv_qrnnd_preinv.  The symptom is seg faults due to registers
	     being clobbered.  gcc 2.95 i386 doesn't have the problem. */
	  {
	    mp_limb_t  workaround = np[dn - 1];
	    if (use_preinv)
	      udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
	    else
	      {
		udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
			    dx << GMP_NAIL_BITS);
		r1 >>= GMP_NAIL_BITS;
	      }
	  }
	  umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
	  p0 >>= GMP_NAIL_BITS;

	  r0 = np[dn - 2];
	  rx = 0;
	  if (r1 < p1 || (r1 == p1 && r0 < p0))
	    {
	      p1 -= p0 < d1;
	      p0 = (p0 - d1) & GMP_NUMB_MASK;
	      q--;
	      r1 = (r1 + dx) & GMP_NUMB_MASK;
	      rx = r1 < dx;
	    }

	  p1 += r0 < p0;	/* cannot carry! */
	  rx -= r1 < p1;	/* may become 11..1 if q is still too large */
	  r1 = (r1 - p1) & GMP_NUMB_MASK;
	  r0 = (r0 - p0) & GMP_NUMB_MASK;

	  cy_limb = mpn_submul_1 (np, dp, dn - 2, q);

	  /* Check if we've over-estimated q, and adjust as needed.  */
	  {
	    mp_limb_t cy1, cy2;
	    cy1 = r0 < cy_limb;
	    r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
	    cy2 = r1 < cy1;
	    r1 -= cy1;
	    np[dn - 1] = r1;
	    np[dn - 2] = r0;
	    if (cy2 != rx)
	      {
		mpn_add_n (np, np, dp, dn);
		q--;
	      }
	  }
	  qp[i] = q;
	}
    }

  /* ______ ______ ______
    |__rx__|__r1__|__r0__|		partial remainder
	    ______ ______
	 - |__p1__|__p0__|		partial product to subtract
	    ______ ______
	 - |______|cylimb|

     rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
     carry out).  If rx=-1 then q is too large.  If rx=0, then q might
     be too large, but it is most likely correct.
  */

  return most_significant_q_limb;
}
コード例 #5
0
mp_limb_t mpn_divrem_euclidean_qr_2(mp_ptr qp, mp_ptr xp, mp_size_t xn, mp_srcptr dp)
{
   mp_size_t qn;
   mp_limb_t qf, t[2], t1[2], q, h, l, d1, d2, i;
   int c1, c3, c4;

   ASSERT(xn >= 2);
   ASSERT_MPN(dp, 2);
   ASSERT_MPN(xp, xn);
   ASSERT(dp[1] != 0);

   qn = xn - 1;

   /* ASSERT(!MPN_OVERLAP_P(qp, qn, xp, xn)); */ /* FIXME: correct this overlap requirement */
   ASSERT((dp[1]>>(GMP_NUMB_BITS - 1)) != 0);

   h = 0;
   d1 = dp[1];
   d2 = dp[0];
   
   invert_limb(i, d1);
   
   l = xp[xn - 1];
   qn = xn - 2;
   t[0] = xp[qn];

   if (l < d1)
   { 
      h = t[1] = l;
      l = t[0] = xp[qn];
      qf = 0;
   }
   else
   {
      qf = 1;
      t[1] = l - d1;
      t1[1] = 0;
      t1[0] = d2;
   
      if (mpn_sub_n(t, t, t1, 2))
      {
         qf--;
         mpn_add_n(t, t, dp, 2);
      }
   
      h = t[1];
      l = t[0];
   }

   for (qn = xn - 3; qn >= 0; qn--)
   {
      t[0] = xp[qn];
    
      if (h < d1)
      {
         udiv_qrnnd_preinv(q, t[1], h, l, d1, i);
         umul_ppmm(t1[1], t1[0], q, d2);
         if (mpn_sub_n(t, t, t1, 2))
         {
            q--;
            if (mpn_add_n(t, t, dp, 2) == 0)
            {
               q--;
               
               ASSERT_CARRY(mpn_add_n(t, t, dp, 2));
            }
         }
      }
      else
      {
         ASSERT(h == d1);
         q = -1;
         t[1] = l;
         c3 = mpn_add_n(t, t, dp, 2);
         c1 = mpn_sub_1(t + 1, t + 1, 1, d2);
         c4 = c3 - c1;
       
         if (l >= d1)
         {
            ASSERT(c3 != 0);
            ASSERT(c4 == 0);
         } /* our guess is B + 1, so q = B - 1 is correct */
         else
         {
            ASSERT(c4 <= 0); /* our guess is B so q = B - 1 or B - 2 */
            if (c4 != 0)
            {
               q--;
               mpn_add_n(t, t, dp, 2);
            }
         }       
      }
    
      h = t[1];
      l = t[0];
      qp[qn] = q;
   }

   xp[1] = t[1];
   xp[0] = t[0];

   return qf;
}
コード例 #6
0
ファイル: div_qr_1n_pi1.c プロジェクト: AllardJ/Tomato
mp_limb_t
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
		   mp_limb_t d, mp_limb_t dinv)
{
  mp_limb_t B2;
  mp_limb_t u0, u2;
  mp_limb_t q0, q1;
  mp_limb_t p0, p1;
  mp_limb_t t;
  mp_size_t j;

  ASSERT (d & GMP_LIMB_HIGHBIT);
  ASSERT (n > 0);
  ASSERT (u1 < d);

  if (n == 1)
    {
      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
      return u1;
    }

  /* FIXME: Could be precomputed */
  B2 = -d*dinv;

  umul_ppmm (q1, q0, dinv, u1);
  umul_ppmm (p1, p0, B2, u1);
  q1 += u1;
  ASSERT (q1 >= u1);
  u0 = up[n-1];	/* Early read, to allow qp == up. */
  qp[n-1] = q1;

  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);

  /* FIXME: Keep q1 in a variable between iterations, to reduce number
     of memory accesses. */
  for (j = n-2; j-- > 0; )
    {
      mp_limb_t q2, cy;

      /* Additions for the q update:
       *	+-------+
       *        |u1 * v |
       *        +---+---+
       *        | u1|
       *    +---+---+
       *    | 1 | v |  (conditional on u2)
       *    +---+---+
       *        | 1 |  (conditional on u0 + u2 B2 carry)
       *        +---+
       * +      | q0|
       *   -+---+---+---+
       *    | q2| q1| q0|
       *    +---+---+---+
      */
      umul_ppmm (p1, t, u1, dinv);
      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
      q0 = t;

      umul_ppmm (p1, p0, u1, B2);
      ADDC_LIMB (cy, u0, u0, u2 & B2);
      u0 -= (-cy) & d;

      /* Final q update */
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
      qp[j+1] = q1;
      MPN_INCR_U (qp+j+2, n-j-2, q2);

      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
    }

  q1 = (u2 > 0);
  u1 -= (-q1) & d;

  t = (u1 >= d);
  q1 += t;
  u1 -= (-t) & d;

  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);

  MPN_INCR_U (qp+1, n-1, q1);

  qp[0] = q0;
  return u0;
}
コード例 #7
0
ファイル: div_qr_1n_pi2.c プロジェクト: bngabonziza/miktex
mp_limb_t
mpn_div_qr_1n_pi2 (mp_ptr qp,
		   mp_srcptr up, mp_size_t un,
		   struct precomp_div_1_pi2 *pd)
{
  mp_limb_t most_significant_q_limb;
  mp_size_t i;
  mp_limb_t r, u2, u1, u0;
  mp_limb_t d0, di1, di0;
  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
  mp_limb_t cnd;

  ASSERT (un >= 2);
  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
  ASSERT_MPN (up, un);

#define q3 q3a
#define q2 q2b
#define q1 q1b

  up += un - 3;
  r = up[2];
  d0 = pd->d;

  most_significant_q_limb = (r >= d0);
  r -= d0 & -most_significant_q_limb;

  qp += un - 3;
  qp[2] = most_significant_q_limb;

  di1 = pd->dip[1];
  di0 = pd->dip[0];

  for (i = un - 3; i >= 0; i -= 2)
    {
      u2 = r;
      u1 = up[1];
      u0 = up[0];

      /* Dividend in {r,u1,u0} */

      umul_ppmm (q1d,q0d, u1, di0);
      umul_ppmm (q2b,q1b, u1, di1);
      q2b++;				/* cannot spill */
      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);

      umul_ppmm (q2c,q1c, u2,  di0);
      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
      umul_ppmm (q3a,q2a, u2, di1);

      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);

      q3 += r;

      r = u0 - q2 * d0;

      cnd = (r >= q1);
      r += d0 & -cnd;
      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);

      if (UNLIKELY (r >= d0))
	{
	  r -= d0;
	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
	}

      qp[0] = q2;
      qp[1] = q3;

      up -= 2;
      qp -= 2;
    }

  if ((un & 1) == 0)
    {
      u2 = r;
      u1 = up[1];

      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
      qp[1] = q3;
    }

  return r;

#undef q3
#undef q2
#undef q1
}