예제 #1
0
mp_limb_t
mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
		    mp_srcptr dip, mp_ptr tp)
{
  mp_size_t lo, hi;
  mp_limb_t cy, qh, ql;

  lo = n >> 1;			/* floor(n/2) */
  hi = n - lo;			/* ceil(n/2) */

  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
  else
    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);

  mpn_mul (tp, qp + lo, hi, dp, lo);

  cy = mpn_sub_n (np + lo, np + lo, tp, n);
  if (qh != 0)
    cy += mpn_sub_n (np + n, np + n, dp, lo);

  while (cy != 0)
    {
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
    }

  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
    ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip);
  else
    ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp);

  if (UNLIKELY (ql != 0))
    {
      mp_size_t i;
      for (i = 0; i < lo; i++)
	qp[i] = GMP_NUMB_MASK;
    }

  return qh;
}
예제 #2
0
void
mpn_tdiv_q (mp_ptr qp,
	   mp_srcptr np, mp_size_t nn,
	   mp_srcptr dp, mp_size_t dn)
{
  mp_ptr new_dp, new_np, tp, rp, scratch;
  mp_limb_t cy, dh, qh;
  mp_size_t new_nn, qn;
  mp_limb_t dinv;
  int cnt;
  TMP_DECL;
  TMP_MARK;

  ASSERT (nn >= dn);
  ASSERT (dn > 0);
  ASSERT (dp[dn - 1] != 0);
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));

  ASSERT_ALWAYS (FUDGE >= 2);
  
  if (dn == 1)
    {
      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
      return;
    }

  scratch = TMP_ALLOC_LIMBS(nn + 1);
  
  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */

  if (qn + FUDGE >= dn)
    {
      /* |________________________|
                          |_______|  */
      new_np = scratch;

      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np, nn, cnt);
	  new_np[nn] = cy;
	  new_nn = nn + (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (dn);
	  mpn_lshift (new_dp, dp, dn, cnt);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
	    {
          invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
	    }
	  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || 
		   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD)) 
	    {
          invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
          qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
	    }
	  else
	    {
           mp_ptr inv = TMP_ALLOC_LIMBS(dn);
           mpn_invert(inv, new_dp, dn);
           qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv);
	    }
	  if (cy == 0)
	    qp[qn - 1] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - dn;
	      for (i = 0; i < n; i++)
		qp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{
	  if (new_np != np)
	    MPN_COPY (new_np, np, nn);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
	    {
           invert_1(dinv, dh, dp[dn - 2]);
           qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv);
	    }
	  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || 
		   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))
	    {
           invert_1(dinv, dh, dp[dn - 2]);
           qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv);
	    }
	  else
	    {
           mp_ptr inv = TMP_ALLOC_LIMBS(dn);
           mpn_invert(inv, dp, dn);
           qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv);
	    }
	  qp[nn - dn] = qh;
	}
    }
  else
    {
      /* |________________________|
                |_________________|  */
      tp = TMP_ALLOC_LIMBS (qn + 1);

      new_np = scratch;
      new_nn = 2 * qn + 1;
      if (new_np == np)
	/* We need {np,nn} to remain untouched until the final adjustment, so
	   we need to allocate separate space for new_np.  */
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);


      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
	  new_np[new_nn] = cy;

	  new_nn += (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

	  if (qn + 1 == 2)
	    {
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))
	    {
          invert_1(dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
	    }
	  else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))
	    {
          invert_1(dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
	    }
	  else
	    {
           mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
           mpn_invert(inv, new_dp, qn + 1);
           qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv); 
	    }
	  if (cy == 0)
	    tp[qn] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - (qn + 1);
	      for (i = 0; i < n; i++)
		tp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{
예제 #3
0
mp_limb_t
mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 
		    mp_limb_t dip, mp_limb_t d1ip, mp_ptr tp)
{
  mp_limb_t qh, cy;
  mp_ptr q_hi;
  mp_size_t m;
  mp_limb_t ret = 0;

  ASSERT (n >= 6);

  /* if the top n limbs of np are >= dp, high limb of quotient is 1 */
  if (mpn_cmp(np + n, dp, n) >= 0)
  {
     ret = 1;
     mpn_sub_n(np + n, np + n, dp, n);
  }

  /* top n limbs of np are now < dp */

  m = (n + 1) / 2;
  q_hi = qp + n - m;

  /* 
     FIXME: we could probably avoid this copy if we could guarantee 
     that sb_div_appr_q/dc_divappr_q_n did not destroy the "bottom 
     half" of N */
  MPN_COPY (tp, np, 2*n);

  /* estimate high m+1 limbs of quotient, using a 2*m by m division
     the quotient may be computed 1 too large as it is approximate, 
     moreover, even computed precisely it may be two too large due
     to the truncation we've done to a 2*m by m division... */
  if (m < DC_DIVAPPR_Q_N_THRESHOLD)
    qh = mpn_sb_divappr_q (q_hi, tp + 2*n - 2*m, 2*m,
			   dp + n - m, m, dip, d1ip);
  else
    qh = mpn_dc_divappr_q_n (q_hi, tp + 2*n - 2*m,
			     dp + n - m, m, dip, d1ip, tp + 2*n);

  /* we therefore decrease the estimate by 3... */
  qh -= mpn_sub_1 (q_hi, q_hi, m, (mp_limb_t) 3);
  
  /* ensuring it doesn't become negative */
  if (qh & GMP_NUMB_HIGHBIT)
    {
      MPN_ZERO (q_hi, m);
      qh = 0;
    }
  
  /* note qh is now always zero as the quotient we have is definitely
     correct or up to two too small, and we already normalised np */
  ASSERT (qh == 0);
  
  /* we know that {np+n-m, n+m} = q_hi * D + e0, where 0 <= e0 < C*B^n, 
     where C is a small positive constant. Estimate q_hi * D using 
     middle product, developing one additional limb, i.e. develop
     n - m + 3 limbs. The bottom limb is meaningless and the next limb
     may be too small by up to some small multiple of n, but recall 
     n << B. */
  mpn_mulmid (tp, dp, n, q_hi + 1, m - 2);

  /* do some parts of the middle product "manually": */
  tp[n - m + 2] += mpn_addmul_1 (tp, dp + m - 2, n - m + 2, q_hi[0]);
  mpn_addmul_1 (tp + 1, dp, n - m + 2, q_hi[m-1]);
  
  /* subtract that estimate from N. We note the limb at np + n - 2 
     is then meaningless, and the next limb mght be too large by a 
     small amount, i.e. the bottom n limbs of np are now possibly
     too large by a quantity much less than dp */
  mpn_sub_n (np + n - 2, np + n - 2, tp, n - m + 3);

  /* recursively divide to obtain low half of quotient, developing
     one more limb than we would need if everything had been exact.
     As this extra limb is out by only a small amount, rounding the
     remaining limbs based on its value and discarding the extra limb
     results in a quotient which is at most 1 too large */
  if (n - m + 2 < DC_DIVAPPR_Q_N_THRESHOLD)
    cy = mpn_sb_divappr_q (tp, np + m - 3, 2*n - 2*m + 4,
			   dp + m - 2, n - m + 2, dip, d1ip);
  else
    cy = mpn_dc_divappr_q_n (tp, np + m - 3, dp + m - 2, n - m + 2,
			     dip, d1ip, tp + n - m + 2);

  /* FIXME: The only reason this copy happens is that we elected to 
     develop one extra quotient limb in the second recursive quotient. */
  MPN_COPY (qp, tp + 1, n - m);

  /* Construct final quotient from low and hi parts... */
  ret += mpn_add_1 (qp + n - m, qp + n - m, m, tp[n-m+1]);
  ret += mpn_add_1 (qp + n - m + 1, qp + n - m + 1, m - 1, cy);
  if (tp[0] >= GMP_NUMB_HIGHBIT)
    ret += mpn_add_1 (qp, qp, n, 1);   /* ...rounding quotient up */

  /* As the final quotient may be 1 too large, we may have ret == 2 
     (it is very unlikely, but can be relatively easily triggered
     at random when dp = 0x80000...0000), then Q must be 2000.... 
     and we should return instead 1ffff.... */
  if (ret == 2)
    {
      ret -= mpn_sub_1 (qp, qp, n, 1);
      ASSERT (ret == 1);
    }

  return ret;
}
예제 #4
0
mp_limb_t
mpn_preinv_dc_divappr_q (mp_ptr qp,
			 mp_ptr np, mp_size_t nn,
			 mp_srcptr dp, mp_size_t dn,
			 mp_srcptr dip)
{
  mp_size_t qn;
  mp_limb_t qh, cy, qsave;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  tp = TMP_SALLOC_LIMBS (dn+1);

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn > dn)
    {
      qn++;			/* pretend we'll need an extra limb */
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      /* Perform the typically smaller block first.  */
      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
      else
	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);

      if (qn != dn)
	{
	  if (qn > dn - qn)
	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	  else
	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	  if (qh != 0)
	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	  while (cy != 0)
	    {
	      qh -= mpn_sub_1 (qp, qp, qn, 1);
	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
	    }
	}

      qn = nn - dn - qn + 1;
      while (qn > dn)
	{
	  qp -= dn;
	  np -= dn;
	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
	  qn -= dn;
	}

      /* Since we pretended we'd need an extra quotient limb before, we now
	 have made sure the code above left just dn-1=qn quotient limbs to
	 develop.  Develop that plus a guard limb. */
      qn--;
      qp -= qn;
      np -= dn;
      qsave = qp[qn];
      mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp);
      MPN_COPY_INCR (qp, qp + 1, qn);
      qp[qn] = qsave;
    }
  else
    {
      if (qn == 0)
	{
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
	  if (qh)
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
	  TMP_FREE;
	  return qh;
	}

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
	 /* Full precision.  Optimal?  */
	qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip);
      else
	{
	  /* Put quotient in tp, use qp as temporary, since qp lacks a limb.  */
	  qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp);
	  MPN_COPY (qp, tp + 1, qn);
	}
    }

  TMP_FREE;
  return qh;
}
예제 #5
0
mp_limb_t
mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
{
  mp_size_t q_orig, qn, sh, sl, i;
  mp_limb_t qh, cy, cy2;
  mp_ptr tp;
  TMP_DECL;

  ASSERT (dn >= 6);
  ASSERT (nn >= dn + 3);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }
  q_orig = qn;

  qh = mpn_cmp(np + nn - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);

  np += nn - dn - qn;
  nn = dn + qn;

  /* Reduce until dn - 1 >= qn */
  while (dn - 1 < qn)
  {
     sh = MIN(dn, qn - dn + 1);
     if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     qn -= sh; nn -= sh; 
  }

  cy = np[nn - 1];

  /* split into two parts */
  sh = qn/2; sl = qn - sh;

  /* Rare case where truncation ruins normalisation */
  if (cy > dp[dn - 1] || (cy == dp[dn - 1] 
     && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0))
     {
        __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn);
        return qh;
     }

  if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0)
     __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh);
  else
  {
     if (sh < SB_DIVAPPR_Q_CUTOFF)
        mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
     else
        mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
  }

  cy = np[nn - sh];

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS(sl + 2);

  mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh);
  cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2);

  TMP_FREE;

  while ((mp_limb_signed_t) cy < 0)
  {
      
     qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */
     
     /*
        correct remainder, noting that "digits" of quotient aren't base B
        but in base varying with truncation, thus correction needs fixup
     */
     cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); 

     for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++)
        cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]);
  }
   
  if (cy != 0) /* special case: unable to canonicalise */
     __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl);
  else
  {
     if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0)
        __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl);
     else
     {
        if (sl < SB_DIVAPPR_Q_CUTOFF)
           mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv);
        else
           mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv);
     }

  }

  return qh;
}
예제 #6
0
/* Check schoolboy division routine. */
void
check_sb_divappr_q (void)
{
   mp_limb_t np[2*MAX_LIMBS];
   mp_limb_t np2[2*MAX_LIMBS];
   mp_limb_t rp[2*MAX_LIMBS];
   mp_limb_t dp[MAX_LIMBS];
   mp_limb_t qp[2*MAX_LIMBS];
   mp_limb_t dip;

   mp_size_t nn, rn, dn, qn;

   gmp_randstate_t rands;

   int i, j, s;
   gmp_randinit_default(rands);
  
   for (i = 0; i < ITERS; i++)
   {
      dn = (random() % (MAX_LIMBS - 2)) + 3;
      nn = (random() % MAX_LIMBS) + dn;
      
      mpn_rrandom (np, rands, nn);
      mpn_rrandom (dp, rands, dn);
      dp[dn-1] |= GMP_LIMB_HIGHBIT;

      MPN_COPY(np2, np, nn);
      
      mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]);
      
      qn = nn - dn + 1;
         
      qp[qn - 1] = mpn_sb_divappr_q(qp, np, nn, dp, dn, dip);

      MPN_NORMALIZE(qp, qn);

      if (qn)
      {
         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);
         else mpn_mul(rp, dp, dn, qp, qn);

         rn = dn + qn;
         MPN_NORMALIZE(rp, rn);

         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);
         if (s <= 0) 
         {
            mpn_sub(rp, np2, nn, rp, rn);
            rn = nn;
            MPN_NORMALIZE(rp, rn);
         } else 
         {
            mpn_sub(rp, rp, rn, np2, nn);
            MPN_NORMALIZE(rp, rn);
         }
      } else
      {
         rn = nn;
         MPN_COPY(rp, np, nn);
      }
      
      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
      if (s >= 0)
      {
         printf ("failed:\n");
         printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
         gmp_printf (" np: %Nx\n\n", np2, nn);
         gmp_printf (" dp: %Nx\n\n", dp, dn);
         gmp_printf (" qp: %Nx\n\n", qp, qn);
         gmp_printf (" rp: %Nx\n\n", rp, rn);
         abort ();
      }
   }

   gmp_randclear(rands);
}