/* 
   Computes the quotient and remainder of { np, 2*dn } by { dp, dn }.
   We require dp to be normalised and inv to be a precomputed inverse 
   of { dp, dn } given by mpn_invert.
*/
mp_limb_t 
mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, 
                           mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_size_t m, i;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }

   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
     
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }

   ret -= mpn_sub_1(qp, qp, dn, 1); 
   if (UNLIKELY(ret == ~CNST_LIMB(0))) 
      ret += mpn_add_1(qp, qp, dn, 1);
   /* ret is now guaranteed to be 0 or 1*/
   ASSERT(ret == 0);

   m = dn + 1;
   if ((dn <= MPN_FFT_MUL_N_MINSIZE) || (ret))
   {
      mpn_mul_n(tp, qp, dp, dn);
   } else
   {
      mp_limb_t cy, cy2;
      
      if (m >= FFT_MULMOD_2EXPP1_CUTOFF)
         m = mpir_fft_adjust_limbs (m);
      cy = mpn_mulmod_Bexpp1_fft (tp, m, qp, dn, dp, dn);
      
      /* cy, {tp, m} = qp * dp mod (B^m+1) */ 
      cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);
      mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
          
      /* Make correction */    
      mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);
   }
   
   mpn_sub_n(np, np, tp, m);
   MPN_ZERO(np + m, 2*dn - m);
   while (np[dn] || mpn_cmp(np, dp, dn) >= 0)
   {
	   ret += mpn_add_1(qp, qp, dn, 1);
	   np[dn] -= mpn_sub_n(np, np, dp, dn);
   }
  
   /* Not possible for ret == 2 as we have qp*dp <= np */
   ASSERT(ret + ret2 < 2);

   TMP_FREE;

   return ret + ret2;
}
/* 
   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is
   either correct or one too large. We require dp to be normalised and inv
   to be a precomputed inverse given by mpn_invert.
*/
mp_limb_t 
mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, 
                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);
   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }
   
   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy + 1);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
    
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }
  
   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) 
   {
	   /* Special case, multiply out to get accurate quotient */
	   ret -= mpn_sub_1(qp, qp, dn, 1);
       if (UNLIKELY(ret == ~CNST_LIMB(0)))
          ret += mpn_add_1(qp, qp, dn, 1);
       /* ret is now guaranteed to be 0*/
       ASSERT(ret == 0);
       mpn_mul_n(tp, qp, dp, dn);
       mpn_sub_n(tp, np, tp, dn+1);
       while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)
	   {
		   ret += mpn_add_1(qp, qp, dn, 1);
		   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);
	   }
       /* Not possible for ret == 2 as we have qp*dp <= np */
       ASSERT(ret + ret2 < 2);
   }

   TMP_FREE;

   return ret + ret2;
}
Beispiel #3
0
/* Input: A = {ap, n} with most significant bit set.
   Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.

   X is a lower approximation of B^(2n)/A with implicit msb.
   More precisely, one has:

              A*X < B^(2n) <= A*(X+1)

   or X = ceil(B^(2n)/A) - 1.
*/
void
mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
{
  if (n == 1)
    {
      /* invert_limb returns min(B-1, floor(B^2/ap[0])-B),
	 which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.
	 For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where
	 the equality holds only when A=B/2.

	 We thus have A*X < B^2 <= A*(X+1).
      */
      invert_limb (xp[0], ap[0]);
    }
  else if (n == 2)
    {
      mp_limb_t tp[4], up[2], sp[2], cy;

      tp[0] = ZERO;
      invert_limb (xp[1], ap[1]);
      tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);
      cy = mpn_add_n (tp + 2, tp + 2, ap, 2);
      while (cy) /* Xh is too large */
	{
	  xp[1] --;
	  cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);
	}
      /* tp[3] should be 111...111 */

      mpn_com_n (sp, tp + 1, 2);
      cy = mpn_add_1 (sp, sp, 2, ONE);
      /* cy should be 0 */

      up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);
      cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);
      /* cy should be 0 */
      xp[0] = up[1];

      /* update tp */
      cy = mpn_addmul_1 (tp, ap, 2, xp[0]);
      cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);
      do
	{
	  cy = mpn_add (tp, tp, 4, ap, 2);
	  if (cy == ZERO)
	    mpn_add_1 (xp, xp, 2, ONE);
	}
      while (cy == ZERO);

      /* now A*X < B^4 <= A*(X+1) */
    }
  else
    {
      mp_size_t l, h;
      mp_ptr tp, up;
      mp_limb_t cy, th;
      int special = 0;
      TMP_DECL;

      l = (n - 1) / 2;
      h = n - l;

      mpn_invert (xp + l, ap + l, h);

      TMP_MARK;
      tp = TMP_ALLOC_LIMBS (n + h);
      up = TMP_ALLOC_LIMBS (2 * h);

      if (n <= WRAP_AROUND_BOUND)
	{
          mpn_mul (tp, ap, n, xp + l, h);
          cy = mpn_add_n (tp + h, tp + h, ap, n);
        }
      else
	{
          mp_size_t m = n + 1;
          mpir_ui k;
          int cc;

          if (m >= FFT_MULMOD_2EXPP1_CUTOFF)
             m = mpir_fft_adjust_limbs (m);
          /* we have m >= n + 1 by construction, thus m > h */
          ASSERT(m < n + h);
          cy = mpn_mulmod_Bexpp1_fft (tp, m, ap, n, xp + l, h);
          /* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */
          cy += mpn_add_n (tp + h, tp + h, ap, m - h);
          cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);
          cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);
          if (cc > cy) /* can only occur if cc=1 and cy=0 */
            cy = mpn_add_1 (tp, tp, m, ONE);
          else
            cy -= cc;
          /* cy, {tp, m} = A * Xh */

          /* add B^(n+h) + B^(n+h-m) */
          MPN_ZERO (tp + m, n + h - m);
          tp[m] = cy;
          /* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,
             the mpn_incr_u() below cannot produce a carry */
          mpn_incr_u (tp + n + h - m, ONE);
          cy = 1;
          do /* check if T >= B^(n+h) + 2*B^n */
            {
              mp_size_t i;

              if (cy == ZERO)
                break; /* surely T < B^(n+h) */
              if (cy == ONE)
                {
                  for (i = n + h - 1; tp[i] == ZERO && i > n; i--);
                  if (i == n && tp[i] < (mp_limb_t) 2)
                    break;
                }
              /* subtract B^m+1 */
              cy -= mpn_sub_1 (tp, tp, n + h, ONE);
              cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);
            }
          while (1);
        }

      while (cy)
	{
	  mpn_sub_1 (xp + l, xp + l, h, ONE);
	  cy -= mpn_sub (tp, tp, n + h, ap, n);
	}

      mpn_not (tp, n);
      th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE);
      mpn_mul_n (up, tp + l, xp + l, h);
      cy = mpn_add_n (up + h, up + h, tp + l, h);
      if (th != ZERO)
      {
         cy += ONE + mpn_add_n (up + h, up + h, xp + l, h);
      }
      if (up[2*h-l-1] + 4 <= CNST_LIMB(3)) special = 1;
      MPN_COPY (xp, up + 2 * h - l, l);
      mpn_add_1 (xp + l, xp + l, h, cy);
      TMP_FREE;
      if ((special) && !mpn_is_invert(xp, ap, n))
         mpn_add_1 (xp, xp, n, 1);
    }
}