コード例 #1
0
ファイル: divrem.c プロジェクト: RodneyBates/M3Devel
mp_limb_t
mpn_divrem (mp_ptr qp, mp_size_t qxn,
	    mp_ptr np, mp_size_t nn,
	    mp_srcptr dp, mp_size_t dn)
{
  ASSERT (qxn >= 0);
  ASSERT (nn >= dn);
  ASSERT (dn >= 1);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  if (dn == 1)
    {
      mp_limb_t ret;
      mp_ptr q2p;
      mp_size_t qn;
      TMP_DECL;

      TMP_MARK;
      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);

      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
      qn = nn + qxn - 1;
      MPN_COPY (qp, q2p, qn);
      ret = q2p[qn];

      TMP_FREE;
      return ret;
    }
  else if (dn == 2)
    {
      return mpn_divrem_2 (qp, qxn, np, nn, dp);
    }
  else
    {
      mp_ptr rp, q2p;
      mp_limb_t qhl;
      mp_size_t qn;
      TMP_DECL;

      TMP_MARK;
      if (UNLIKELY (qxn != 0))
	{
	  mp_ptr n2p;
	  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
	  MPN_ZERO (n2p, qxn);
	  MPN_COPY (n2p + qxn, np, nn);
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
	  MPN_COPY (np, rp, dn);
	  qn = nn - dn + qxn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
	}
      else
	{
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
	  MPN_COPY (np, rp, dn);	/* overwrite np area with remainder */
	  qn = nn - dn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
	}
      TMP_FREE;
      return qhl;
    }
}
コード例 #2
0
ファイル: mode1o.c プロジェクト: STAR111/GCC_parser
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
                     mp_limb_t orig_c)
{
  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
  mp_limb_t  c = orig_c;
  mp_size_t  i;

  ASSERT (size >= 1);
  ASSERT (d & 1);
  ASSERT_MPN (src, size);
  ASSERT_LIMB (d);
  ASSERT_LIMB (c);

  if (size == 1)
    {
      s = src[0];
      if (s > c)
	{
	  l = s-c;
	  h = l % d;
	  if (h != 0)
	    h = d - h;
	}
      else
	{
	  l = c-s;
	  h = l % d;
	}
      return h;
    }


  modlimb_invert (inverse, d);
  dmul = d << GMP_NAIL_BITS;

  i = 0;
  do
    {
      s = src[i];
      SUBC_LIMB (c, l, s, c);
      l = (l * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, l, dmul);
      c += h;
    }
  while (++i < size-1);


  s = src[i];
  if (s <= d)
    {
      /* With high<=d the final step can be a subtract and addback.  If c==0
	 then the addback will restore to l>=0.  If c==d then will get l==d
	 if s==0, but that's ok per the function definition.  */

      l = c - s;
      if (c < s)
	l += d;

      ret = l;
    }
  else
    {
      /* Can't skip a divide, just do the loop code once more. */

      SUBC_LIMB (c, l, s, c);
      l = (l * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, l, dmul);
      c += h;
      ret = c;
    }

  ASSERT (orig_c < d ? ret < d : ret <= d);
  return ret;
}
コード例 #3
0
ファイル: bdivmod.c プロジェクト: RodneyBates/M3Devel
mp_limb_t
mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
{
  mp_limb_t v_inv;

  ASSERT (usize >= 1);
  ASSERT (vsize >= 1);
  ASSERT (usize * GMP_NUMB_BITS >= d);
  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
  ASSERT_MPN (up, usize);
  ASSERT_MPN (vp, vsize);

  /* 1/V mod 2^GMP_NUMB_BITS. */
  binvert_limb (v_inv, vp[0]);

  /* Fast code for two cases previously used by the accel part of mpn_gcd.
     (Could probably remove this now it's inlined there.) */
  if (usize == 2 && vsize == 2 &&
      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
    {
      mp_limb_t hi, lo;
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
      up[0] = 0;
      up[1] -= hi + q*vp[1];
      qp[0] = q;
      if (d == 2*GMP_NUMB_BITS)
        {
          q = (up[1] * v_inv) & GMP_NUMB_MASK;
          up[1] = 0;
          qp[1] = q;
        }
      return 0;
    }

  /* Main loop.  */
  while (d >= GMP_NUMB_BITS)
    {
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      d -= GMP_NUMB_BITS;
      up += 1, usize -= 1;
      *qp++ = q;
    }

  if (d)
    {
      mp_limb_t b;
      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
      if (q <= 1)
	{
	  if (q == 0)
	    return 0;
	  else
	    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
	}
      else
	b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);

      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      return q;
    }

  return 0;
}
コード例 #4
0
ファイル: mode1o.c プロジェクト: AaronNGray/texlive-libs
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)
{
  mp_limb_t  c = orig_c;
  mp_limb_t  s, l, q, h, inverse;

  ASSERT (size >= 1);
  ASSERT (d & 1);
  ASSERT_MPN (src, size);
  ASSERT_LIMB (d);
  ASSERT_LIMB (c);

  /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */
  if (size == 1)
    {
      s = src[0];
      if (s > c)
	{
	  l = s-c;
	  h = l % d;
	  if (h != 0)
	    h = d - h;
	}
      else
	{
	  l = c-s;
	  h = l % d;
	}
      return h;
    }

  binvert_limb (inverse, d);

  if (d <= 0xFFFFFFFF)
    {
      s = *src++;
      size--;
      do
        {
          SUBC_LIMB (c, l, s, c);
          s = *src++;
          q = l * inverse;
          umul_ppmm_half_lowequal (h, q, d, l);
          c += h;
          size--;
        }
      while (size != 0);

      if (s <= d)
        {
          /* With high s <= d the final step can be a subtract and addback.
             If c==0 then the addback will restore to l>=0.  If c==d then
             will get l==d if s==0, but that's ok per the function
             definition.  */

          l = c - s;
          l += (l > c ? d : 0);

          ASSERT_RETVAL (l);
          return l;
        }
      else
        {
          /* Can't skip a divide, just do the loop code once more. */
          SUBC_LIMB (c, l, s, c);
          q = l * inverse;
          umul_ppmm_half_lowequal (h, q, d, l);
          c += h;

          ASSERT_RETVAL (c);
          return c;
        }
    }
  else
    {
      mp_limb_t  dl = LOW32 (d);
      mp_limb_t  dh = HIGH32 (d);
      long i;

      s = *src++;
      size--;
      do
        {
          SUBC_LIMB (c, l, s, c);
          s = *src++;
          q = l * inverse;
          umul_ppmm_lowequal (h, q, d, dh, dl, l);
          c += h;
          size--;
        }
      while (size != 0);

      if (s <= d)
        {
          /* With high s <= d the final step can be a subtract and addback.
             If c==0 then the addback will restore to l>=0.  If c==d then
             will get l==d if s==0, but that's ok per the function
             definition.  */

          l = c - s;
          l += (l > c ? d : 0);

          ASSERT_RETVAL (l);
          return l;
        }
      else
        {
          /* Can't skip a divide, just do the loop code once more. */
          SUBC_LIMB (c, l, s, c);
          q = l * inverse;
          umul_ppmm_lowequal (h, q, d, dh, dl, l);
          c += h;

          ASSERT_RETVAL (c);
          return c;
        }
    }
}
コード例 #5
0
ファイル: mulhigh_n.c プロジェクト: HRF92/mpir
/* (rp, 2n) = (xp, n)*(yp, n) */
static void
mpn_mulshort_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t m;
  mp_limb_t t;
  mp_ptr rpn2;

  ASSERT(n >= 1);
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));
  
  if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))
    {
      mpn_mul_basecase(rp, xp, n, yp, n);
      
      return;
    }

  if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))
    {
      mpn_mulshort_n_basecase(rp, xp, yp, n);
      
      return;
    }

  /* choose optimal m s.t. n + 2 <= 2m,  m < n */
  ASSERT (n >= 4);

  m = 87 * n / 128;
  
  if (2 * m < n + 2)
    m = (n + 1) / 2 + 1;
  
  if (m >= n)
    m = n - 1;
  
  ASSERT (n + 2 <= 2 * m);
  ASSERT (m < n);
  
  rpn2 = rp + n - 2;
  
  mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);
  mpn_mulshort_n (rp, xp, yp + m, n - m);

  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
  
  mpn_mulshort_n (rp, xp + m, yp, n - m);
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
  
  umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);
  rp[0] = t >> GMP_NAIL_BITS;
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
  
  umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);
  rp[0] = t >> GMP_NAIL_BITS;
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
  
  return;
}
コード例 #6
0
ファイル: subadd_n.c プロジェクト: BrianGladman/mpir
/* 
   t = x - y - z or t = x - (y + z) which explains the name 
*/
mp_limb_t mpn_subadd_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n)
{
   mp_limb_t ret;

   ASSERT(n > 0);
   ASSERT_MPN(x, n);
   ASSERT_MPN(y, n);
   ASSERT_MPN(z, n);
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n));
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n));
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n));

   if (t == x && t == y && t == z)
      return mpn_neg(t,z,n);  

   if (t == x && t == y)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
      
      return ret;
   }

   if (t == x && t == z)
   {
      ret = mpn_sub_n(t, x, z, n);
      ret += mpn_sub_n(t, t, y, n);
   
      return ret;
   }

   if (t == y && t == z)
   {
      ret = mpn_add_n(t, y, z, n);
      ret += mpn_sub_n(t, x, t, n);
   
      return ret;
   }

   if (t == x)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
   
      return ret;
   }
 
   if (t == y)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
   
      return ret;
   }

   if (t == z)
   {
      ret = mpn_sub_n(t, x, z, n);
      ret += mpn_sub_n(t, t, y, n);
   
      return ret;
   }

   ret = mpn_sub_n(t, x, z, n);
   ret += mpn_sub_n(t, t, y, n);

   return ret;
}
コード例 #7
0
ファイル: mulhigh_n.c プロジェクト: BrianGladman/mpir
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ 
inline static void
mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t i, k;

  ASSERT(n >= 3);  /* this restriction doesn't make a lot of sense in general */
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));

  k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */
  i = 0;

  /* Multiply w limbs from y + i to (2 + i + w - 1) limbs from x + (n - 2 - i - w + 1)
     and put it into r + (n - 2 - w + 1), "overflow" (i.e. last) limb into
     r + (n + w - 1) for i between 0 and n - 2.
     i == n - w needs special treatment. */

  /* We first multiply by the low order limb (or depending on optional function
     availability, limbs).  This result can be stored, not added, to rp.  We
     also avoid a loop for zeroing this way.  */

#if HAVE_NATIVE_mpn_mul_2
  rp[n + 1] = mpn_mul_2 (rp + k - 1, xp + k - 1, 2 + 1, yp);
  i += 2;
#else
  rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]);
  i += 1;
#endif

#if HAVE_NATIVE_mpn_addmul_6
  while (i < n - 6)
    {
      rp[n + i + 6 - 1] = mpn_addmul_6 (rp + k - 6 + 1, xp + k - i - 6 + 1, 2 + i + 6 - 1, yp + i);
      i += 6;
    }
  if (i == n - 6)
    {
      rp[n + n - 1] = mpn_addmul_6 (rp + i, xp, n, yp + i);
      return;
    }
#endif

#if HAVE_NATIVE_mpn_addmul_5
  while (i < n - 5)
    {
      rp[n + i + 5 - 1] = mpn_addmul_5 (rp + k - 5 + 1, xp + k - i - 5 + 1, 2 + i + 5 - 1, yp + i)
      i += 5;
    }
  if (i == n - 5)
    {
      rp[n + n - 1] = mpn_addmul_5 (rp + i, xp, n, yp + i);
      return;
    }
#endif

#if HAVE_NATIVE_mpn_addmul_4
  while (i < n - 4)
    {
      rp[n + i + 4 - 1] = mpn_addmul_4 (rp + k - 4 + 1, xp + k - i - 4 + 1, 2 + i + 4 - 1, yp + i);
      i += 4;
    }
  if (i == n - 4)
    {
      rp[n + n - 1] = mpn_addmul_4 (rp + i, xp, n, yp + i);
      return;
    }
#endif

#if HAVE_NATIVE_mpn_addmul_3
  while (i < n - 3)
    {
      rp[n + i + 3 - 1] = mpn_addmul_3 (rp + k - 3 + 1, xp + k - i - 3 + 1, 2 + i + 3 - 1, yp + i);
      i += 3;
    }
  if (i == n - 3)
    {
      rp[n + n - 1] = mpn_addmul_3 (rp + i, xp, n, yp + i);
      return;
    }
#endif

#if HAVE_NATIVE_mpn_addmul_2
  while (i < n - 2)
    {
      rp[n + i + 2 - 1] = mpn_addmul_2 (rp + k - 2 + 1, xp + k - i - 2 + 1, 2 + i + 2 - 1, yp + i);
      i += 2;
    }
  if (i == n - 2)
    {
      rp[n + n - 1] = mpn_addmul_2 (rp + i, xp, n, yp + i);
      return;
    }
#endif

  while (i < n - 1)
    {
      rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
      i += 1;
    }
  rp[n + n - 1] = mpn_addmul_1 (rp + i, xp, n, yp[i]);
  return;
}
コード例 #8
0
/*
   ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1  
   needs (tp, 2n) temp space, everything reduced mod 2^b 
   inputs, outputs are fully reduced
  
   N.B: 2n is not the same as 2b rounded up to nearest limb!
*/
inline static int
mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,
			                                         mpir_ui b, mp_ptr tp)
{
  mp_size_t n, k;
  mp_limb_t c;

  n = BITS_TO_LIMBS (b);
  k = GMP_NUMB_BITS * n - b;

  TMP_DECL;

  ASSERT(b > 0);
  ASSERT(n > 0);
  ASSERT_MPN(yp, n);
  ASSERT_MPN(zp, n);
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));
  ASSERT(!MPN_OVERLAP_P (xp, n, yp, n));
  ASSERT(!MPN_OVERLAP_P (xp, n, zp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);

#ifndef TUNE_PROGRAM_BUILD
  if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n))
  {
      mp_bitcnt_t depth1, depth = 1;
      mp_size_t w1, off;
      mp_ptr tx, ty, tz;

      TMP_MARK;

      tx = TMP_BALLOC_LIMBS(3*n + 3);
      ty = tx + n + 1;
      tz = ty + n + 1;

      MPN_COPY(ty, yp, n);
      MPN_COPY(tz, zp, n);
      ty[n] = 0;
      tz[n] = 0;

      while ((((mp_limb_t)1)<<depth) < b) depth++;
   
      if (depth < 12) off = mulmod_2expp1_table_n[0];
      else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
      depth1 = depth/2 - off;
   
      w1 = b/(((mp_limb_t)1)<<(2*depth1));

      mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1);

      MPN_COPY(xp, tx, n);
      TMP_FREE;

	   return tx[n];
  }
#endif

  if (yp == zp)
     mpn_sqr(tp, yp, n);
  else
     mpn_mul_n (tp, yp, zp, n);

  if (k == 0)
    {
      c = mpn_sub_n (xp, tp, tp + n, n);

      return mpn_add_1 (xp, xp, n, c);
    }

  c = tp[n - 1];
  tp[n - 1] &= GMP_NUMB_MASK >> k;

#if HAVE_NATIVE_mpn_sublsh_nc
  c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);
#else
  {
    mp_limb_t c1;
    c1 = mpn_lshift (tp + n, tp + n, n, k);
    tp[n] |= c >> (GMP_NUMB_BITS - k);
    c = mpn_sub_n (xp, tp, tp + n, n) + c1;
  }
#endif

  c = mpn_add_1 (xp, xp, n, c);
  xp[n - 1] &= GMP_NUMB_MASK >> k;

  return c;
}
コード例 #9
0
ファイル: div_qr_1n_pi2.c プロジェクト: bngabonziza/miktex
mp_limb_t
mpn_div_qr_1n_pi2 (mp_ptr qp,
		   mp_srcptr up, mp_size_t un,
		   struct precomp_div_1_pi2 *pd)
{
  mp_limb_t most_significant_q_limb;
  mp_size_t i;
  mp_limb_t r, u2, u1, u0;
  mp_limb_t d0, di1, di0;
  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
  mp_limb_t cnd;

  ASSERT (un >= 2);
  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
  ASSERT_MPN (up, un);

#define q3 q3a
#define q2 q2b
#define q1 q1b

  up += un - 3;
  r = up[2];
  d0 = pd->d;

  most_significant_q_limb = (r >= d0);
  r -= d0 & -most_significant_q_limb;

  qp += un - 3;
  qp[2] = most_significant_q_limb;

  di1 = pd->dip[1];
  di0 = pd->dip[0];

  for (i = un - 3; i >= 0; i -= 2)
    {
      u2 = r;
      u1 = up[1];
      u0 = up[0];

      /* Dividend in {r,u1,u0} */

      umul_ppmm (q1d,q0d, u1, di0);
      umul_ppmm (q2b,q1b, u1, di1);
      q2b++;				/* cannot spill */
      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);

      umul_ppmm (q2c,q1c, u2,  di0);
      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
      umul_ppmm (q3a,q2a, u2, di1);

      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);

      q3 += r;

      r = u0 - q2 * d0;

      cnd = (r >= q1);
      r += d0 & -cnd;
      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);

      if (UNLIKELY (r >= d0))
	{
	  r -= d0;
	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
	}

      qp[0] = q2;
      qp[1] = q3;

      up -= 2;
      qp -= 2;
    }

  if ((un & 1) == 0)
    {
      u2 = r;
      u1 = up[1];

      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
      qp[1] = q3;
    }

  return r;

#undef q3
#undef q2
#undef q1
}
コード例 #10
0
ファイル: sb_divrem_mn.c プロジェクト: angavrilov/ecl-sse
mp_limb_t
mpn_sb_divrem_mn (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn)
{
  mp_limb_t most_significant_q_limb = 0;
  mp_size_t qn = nn - dn;
  mp_size_t i;
  mp_limb_t dx, d1, n0;
  mp_limb_t dxinv;
  int use_preinv;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  np += qn;
  dx = dp[dn - 1];
  d1 = dp[dn - 2];
  n0 = np[dn - 1];

  if (n0 >= dx)
    {
      if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
	{
	  mpn_sub_n (np, np, dp, dn);
	  most_significant_q_limb = 1;
	}
    }

  use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
  if (use_preinv)
    invert_limb (dxinv, dx);

  for (i = qn - 1; i >= 0; i--)
    {
      mp_limb_t q;
      mp_limb_t nx;
      mp_limb_t cy_limb;

      nx = np[dn - 1];		/* FIXME: could get value from r1 */
      np--;

      if (nx == dx)
	{
	  /* This might over-estimate q, but it's probably not worth
	     the extra code here to find out.  */
	  q = GMP_NUMB_MASK;

#if 1
	  cy_limb = mpn_submul_1 (np, dp, dn, q);
#else
	  /* This should be faster on many machines */
	  cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
	  cy = mpn_add_n (np, np, dp, dn);
	  np[dn] += cy;
#endif

	  if (nx != cy_limb)
	    {
	      mpn_add_n (np, np, dp, dn);
	      q--;
	    }

	  qp[i] = q;
	}
      else
	{
	  mp_limb_t rx, r1, r0, p1, p0;

	  /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
	     when np[dn-1] is used in an asm statement like umul_ppmm in
	     udiv_qrnnd_preinv.  The symptom is seg faults due to registers
	     being clobbered.  gcc 2.95 i386 doesn't have the problem. */
	  {
	    mp_limb_t  workaround = np[dn - 1];
	    if (CACHED_ABOVE_THRESHOLD (use_preinv, DIV_SB_PREINV_THRESHOLD))
	      udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
	    else
	      {
		udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
			    dx << GMP_NAIL_BITS);
		r1 >>= GMP_NAIL_BITS;
	      }
	  }
	  umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
	  p0 >>= GMP_NAIL_BITS;

	  r0 = np[dn - 2];
	  rx = 0;
	  if (r1 < p1 || (r1 == p1 && r0 < p0))
	    {
	      p1 -= p0 < d1;
	      p0 = (p0 - d1) & GMP_NUMB_MASK;
	      q--;
	      r1 = (r1 + dx) & GMP_NUMB_MASK;
	      rx = r1 < dx;
	    }

	  p1 += r0 < p0;	/* cannot carry! */
	  rx -= r1 < p1;	/* may become 11..1 if q is still too large */
	  r1 = (r1 - p1) & GMP_NUMB_MASK;
	  r0 = (r0 - p0) & GMP_NUMB_MASK;

	  cy_limb = mpn_submul_1 (np, dp, dn - 2, q);

	  /* Check if we've over-estimated q, and adjust as needed.  */
	  {
	    mp_limb_t cy1, cy2;
	    cy1 = r0 < cy_limb;
	    r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
	    cy2 = r1 < cy1;
	    r1 -= cy1;
	    np[dn - 1] = r1;
	    np[dn - 2] = r0;
	    if (cy2 != rx)
	      {
		mpn_add_n (np, np, dp, dn);
		q--;
	      }
	  }
	  qp[i] = q;
	}
    }

  /* ______ ______ ______
    |__rx__|__r1__|__r0__|		partial remainder
	    ______ ______
	 - |__p1__|__p0__|		partial product to subtract
	    ______ ______
	 - |______|cylimb|

     rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
     carry out).  If rx=-1 then q is too large.  If rx=0, then q might
     be too large, but it is most likely correct.
  */

  return most_significant_q_limb;
}