mp_limb_t mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_srcptr dip, mp_ptr tp) { mp_size_t lo, hi; mp_limb_t cy, qh, ql; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip); else qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp); mpn_mul (tp, qp + lo, hi, dp, lo); cy = mpn_sub_n (np + lo, np + lo, tp, n); if (qh != 0) cy += mpn_sub_n (np + n, np + n, dp, lo); while (cy != 0) { qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); cy -= mpn_add_n (np + lo, np + lo, dp, n); } if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip); else ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp); if (UNLIKELY (ql != 0)) { mp_size_t i; for (i = 0; i < lo; i++) qp[i] = GMP_NUMB_MASK; } return qh; }
void mpn_tdiv_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { mp_ptr new_dp, new_np, tp, rp, scratch; mp_limb_t cy, dh, qh; mp_size_t new_nn, qn; mp_limb_t dinv; int cnt; TMP_DECL; TMP_MARK; ASSERT (nn >= dn); ASSERT (dn > 0); ASSERT (dp[dn - 1] != 0); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); ASSERT_ALWAYS (FUDGE >= 2); if (dn == 1) { mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); return; } scratch = TMP_ALLOC_LIMBS(nn + 1); qn = nn - dn + 1; /* Quotient size, high limb might be zero */ if (qn + FUDGE >= dn) { /* |________________________| |_______| */ new_np = scratch; dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np, nn, cnt); new_np[nn] = cy; new_nn = nn + (cy != 0); new_dp = TMP_ALLOC_LIMBS (dn); mpn_lshift (new_dp, dp, dn, cnt); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) { invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv); } else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD)) { invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv); } else { mp_ptr inv = TMP_ALLOC_LIMBS(dn); mpn_invert(inv, new_dp, dn); qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv); } if (cy == 0) qp[qn - 1] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - dn; for (i = 0; i < n; i++) qp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ { if (new_np != np) MPN_COPY (new_np, np, nn); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) { invert_1(dinv, dh, dp[dn - 2]); qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv); } else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD)) { invert_1(dinv, dh, dp[dn - 2]); qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv); } else { mp_ptr inv = TMP_ALLOC_LIMBS(dn); mpn_invert(inv, dp, dn); qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv); } qp[nn - dn] = qh; } } else { /* |________________________| |_________________| */ tp = TMP_ALLOC_LIMBS (qn + 1); new_np = scratch; new_nn = 2 * qn + 1; if (new_np == np) /* We need {np,nn} to remain untouched until the final adjustment, so we need to allocate separate space for new_np. */ new_np = TMP_ALLOC_LIMBS (new_nn + 1); dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); new_np[new_nn] = cy; new_nn += (cy != 0); new_dp = TMP_ALLOC_LIMBS (qn + 1); mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); if (qn + 1 == 2) { qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD)) { invert_1(dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv); } else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD)) { invert_1(dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv); } else { mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1); mpn_invert(inv, new_dp, qn + 1); qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv); } if (cy == 0) tp[qn] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - (qn + 1); for (i = 0; i < n; i++) tp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ {
mp_limb_t mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_limb_t dip, mp_limb_t d1ip, mp_ptr tp) { mp_limb_t qh, cy; mp_ptr q_hi; mp_size_t m; mp_limb_t ret = 0; ASSERT (n >= 6); /* if the top n limbs of np are >= dp, high limb of quotient is 1 */ if (mpn_cmp(np + n, dp, n) >= 0) { ret = 1; mpn_sub_n(np + n, np + n, dp, n); } /* top n limbs of np are now < dp */ m = (n + 1) / 2; q_hi = qp + n - m; /* FIXME: we could probably avoid this copy if we could guarantee that sb_div_appr_q/dc_divappr_q_n did not destroy the "bottom half" of N */ MPN_COPY (tp, np, 2*n); /* estimate high m+1 limbs of quotient, using a 2*m by m division the quotient may be computed 1 too large as it is approximate, moreover, even computed precisely it may be two too large due to the truncation we've done to a 2*m by m division... */ if (m < DC_DIVAPPR_Q_N_THRESHOLD) qh = mpn_sb_divappr_q (q_hi, tp + 2*n - 2*m, 2*m, dp + n - m, m, dip, d1ip); else qh = mpn_dc_divappr_q_n (q_hi, tp + 2*n - 2*m, dp + n - m, m, dip, d1ip, tp + 2*n); /* we therefore decrease the estimate by 3... */ qh -= mpn_sub_1 (q_hi, q_hi, m, (mp_limb_t) 3); /* ensuring it doesn't become negative */ if (qh & GMP_NUMB_HIGHBIT) { MPN_ZERO (q_hi, m); qh = 0; } /* note qh is now always zero as the quotient we have is definitely correct or up to two too small, and we already normalised np */ ASSERT (qh == 0); /* we know that {np+n-m, n+m} = q_hi * D + e0, where 0 <= e0 < C*B^n, where C is a small positive constant. Estimate q_hi * D using middle product, developing one additional limb, i.e. develop n - m + 3 limbs. The bottom limb is meaningless and the next limb may be too small by up to some small multiple of n, but recall n << B. */ mpn_mulmid (tp, dp, n, q_hi + 1, m - 2); /* do some parts of the middle product "manually": */ tp[n - m + 2] += mpn_addmul_1 (tp, dp + m - 2, n - m + 2, q_hi[0]); mpn_addmul_1 (tp + 1, dp, n - m + 2, q_hi[m-1]); /* subtract that estimate from N. We note the limb at np + n - 2 is then meaningless, and the next limb mght be too large by a small amount, i.e. the bottom n limbs of np are now possibly too large by a quantity much less than dp */ mpn_sub_n (np + n - 2, np + n - 2, tp, n - m + 3); /* recursively divide to obtain low half of quotient, developing one more limb than we would need if everything had been exact. As this extra limb is out by only a small amount, rounding the remaining limbs based on its value and discarding the extra limb results in a quotient which is at most 1 too large */ if (n - m + 2 < DC_DIVAPPR_Q_N_THRESHOLD) cy = mpn_sb_divappr_q (tp, np + m - 3, 2*n - 2*m + 4, dp + m - 2, n - m + 2, dip, d1ip); else cy = mpn_dc_divappr_q_n (tp, np + m - 3, dp + m - 2, n - m + 2, dip, d1ip, tp + n - m + 2); /* FIXME: The only reason this copy happens is that we elected to develop one extra quotient limb in the second recursive quotient. */ MPN_COPY (qp, tp + 1, n - m); /* Construct final quotient from low and hi parts... */ ret += mpn_add_1 (qp + n - m, qp + n - m, m, tp[n-m+1]); ret += mpn_add_1 (qp + n - m + 1, qp + n - m + 1, m - 1, cy); if (tp[0] >= GMP_NUMB_HIGHBIT) ret += mpn_add_1 (qp, qp, n, 1); /* ...rounding quotient up */ /* As the final quotient may be 1 too large, we may have ret == 2 (it is very unlikely, but can be relatively easily triggered at random when dp = 0x80000...0000), then Q must be 2000.... and we should return instead 1ffff.... */ if (ret == 2) { ret -= mpn_sub_1 (qp, qp, n, 1); ASSERT (ret == 1); } return ret; }
mp_limb_t mpn_preinv_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_srcptr dip) { mp_size_t qn; mp_limb_t qh, cy, qsave; mp_ptr tp; TMP_DECL; TMP_MARK; tp = TMP_SALLOC_LIMBS (dn+1); qn = nn - dn; qp += qn; np += nn; dp += dn; if (qn > dn) { qn++; /* pretend we'll need an extra limb */ /* Reduce qn mod dn without division, optimizing small operations. */ do qn -= dn; while (qn > dn); qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ /* Perform the typically smaller block first. */ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } qn = nn - dn - qn + 1; while (qn > dn) { qp -= dn; np -= dn; mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp); qn -= dn; } /* Since we pretended we'd need an extra quotient limb before, we now have made sure the code above left just dn-1=qn quotient limbs to develop. Develop that plus a guard limb. */ qn--; qp -= qn; np -= dn; qsave = qp[qn]; mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp); MPN_COPY_INCR (qp, qp + 1, qn); qp[qn] = qsave; } else { if (qn == 0) { qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; if (qh) mpn_sub_n (np - dn, np - dn, dp - dn, dn); TMP_FREE; return qh; } qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) /* Full precision. Optimal? */ qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip); else { /* Put quotient in tp, use qp as temporary, since qp lacks a limb. */ qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp); MPN_COPY (qp, tp + 1, qn); } } TMP_FREE; return qh; }
mp_limb_t mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t q_orig, qn, sh, sl, i; mp_limb_t qh, cy, cy2; mp_ptr tp; TMP_DECL; ASSERT (dn >= 6); ASSERT (nn >= dn + 3); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); qn = nn - dn; if (qn + 1 < dn) { dp += dn - (qn + 1); dn = qn + 1; } q_orig = qn; qh = mpn_cmp(np + nn - dn, dp, dn) >= 0; if (qh != 0) mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn); np += nn - dn - qn; nn = dn + qn; /* Reduce until dn - 1 >= qn */ while (dn - 1 < qn) { sh = MIN(dn, qn - dn + 1); if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); qn -= sh; nn -= sh; } cy = np[nn - 1]; /* split into two parts */ sh = qn/2; sl = qn - sh; /* Rare case where truncation ruins normalisation */ if (cy > dp[dn - 1] || (cy == dp[dn - 1] && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0)) { __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn); return qh; } if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0) __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh); else { if (sh < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); else mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); } cy = np[nn - sh]; TMP_MARK; tp = TMP_ALLOC_LIMBS(sl + 2); mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh); cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2); TMP_FREE; while ((mp_limb_signed_t) cy < 0) { qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */ /* correct remainder, noting that "digits" of quotient aren't base B but in base varying with truncation, thus correction needs fixup */ cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++) cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]); } if (cy != 0) /* special case: unable to canonicalise */ __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl); else { if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0) __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl); else { if (sl < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv); else mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv); } } return qh; }
/* Check schoolboy division routine. */ void check_sb_divappr_q (void) { mp_limb_t np[2*MAX_LIMBS]; mp_limb_t np2[2*MAX_LIMBS]; mp_limb_t rp[2*MAX_LIMBS]; mp_limb_t dp[MAX_LIMBS]; mp_limb_t qp[2*MAX_LIMBS]; mp_limb_t dip; mp_size_t nn, rn, dn, qn; gmp_randstate_t rands; int i, j, s; gmp_randinit_default(rands); for (i = 0; i < ITERS; i++) { dn = (random() % (MAX_LIMBS - 2)) + 3; nn = (random() % MAX_LIMBS) + dn; mpn_rrandom (np, rands, nn); mpn_rrandom (dp, rands, dn); dp[dn-1] |= GMP_LIMB_HIGHBIT; MPN_COPY(np2, np, nn); mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]); qn = nn - dn + 1; qp[qn - 1] = mpn_sb_divappr_q(qp, np, nn, dp, dn, dip); MPN_NORMALIZE(qp, qn); if (qn) { if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn); else mpn_mul(rp, dp, dn, qp, qn); rn = dn + qn; MPN_NORMALIZE(rp, rn); s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn); if (s <= 0) { mpn_sub(rp, np2, nn, rp, rn); rn = nn; MPN_NORMALIZE(rp, rn); } else { mpn_sub(rp, rp, rn, np2, nn); MPN_NORMALIZE(rp, rn); } } else { rn = nn; MPN_COPY(rp, np, nn); } s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn); if (s >= 0) { printf ("failed:\n"); printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn); gmp_printf (" np: %Nx\n\n", np2, nn); gmp_printf (" dp: %Nx\n\n", dp, dn); gmp_printf (" qp: %Nx\n\n", qp, qn); gmp_printf (" rp: %Nx\n\n", rp, rn); abort (); } } gmp_randclear(rands); }