mp_limb_t mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_srcptr dip, mp_ptr tp) { mp_size_t lo, hi; mp_limb_t cy, qh, ql; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip); else qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp); mpn_mul (tp, qp + lo, hi, dp, lo); cy = mpn_sub_n (np + lo, np + lo, tp, n); if (qh != 0) cy += mpn_sub_n (np + n, np + n, dp, lo); while (cy != 0) { qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); cy -= mpn_add_n (np + lo, np + lo, dp, n); } if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip); else ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp); if (UNLIKELY (ql != 0)) { mp_size_t i; for (i = 0; i < lo; i++) qp[i] = GMP_NUMB_MASK; } return qh; }
mp_limb_t mpn_dc_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t qn; mp_limb_t qh, cy; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT (dn >= 6); /* to adhere to mpn_sb_div_qr's limits */ ASSERT (nn - dn >= 3); /* to adhere to mpn_sb_div_qr's limits */ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn)); qn = nn - dn; qp += qn; np += nn; dp += dn; if (qn > dn) { /* Reduce qn mod dn without division, optimizing small operations. */ do qn -= dn; while (qn > dn); qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ /* Perform the typically smaller block first. */ if (qn == 1) { mp_limb_t q, n2, n1, n0, d1, d0, d11, d01; /* Handle qh up front, for simplicity. */ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; if (qh) ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); /* A single iteration of schoolbook: One 3/2 division, followed by the bignum update and adjustment. */ n2 = np[0]; n1 = np[-1]; n0 = np[-2]; d1 = dp[-1]; d0 = dp[-2]; d01 = d0 + 1; d11 = d1 + (d01 < d0); ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); if (UNLIKELY (n2 == d1) && n1 == d0) { q = GMP_NUMB_MASK; cy = mpn_submul_1 (np - dn, dp - dn, dn, q); ASSERT (cy == n2); } else { mpir_divrem32_preinv2 (q, n1, n0, n2, n1, n0, d11, d01, d1, d0, dinv); if (dn > 2) { mp_limb_t cy, cy1; cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); cy1 = n0 < cy; n0 = (n0 - cy) & GMP_NUMB_MASK; cy = n1 < cy1; n1 = (n1 - cy1) & GMP_NUMB_MASK; np[-2] = n0; if (UNLIKELY (cy != 0)) { n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); qh -= (q == 0); q = (q - 1) & GMP_NUMB_MASK; } } else np[-2] = n0; np[-1] = n1; } qp[0] = q; } else { /* Do a 2qn / qn division */ if (qn == 2) qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */ else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } } qn = nn - dn - qn; do { qp -= dn; np -= dn; ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp)); qn -= dn; } while (qn > 0); } else { qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } } TMP_FREE; return qh; }
mp_limb_t mpn_preinv_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_srcptr dip) { mp_size_t qn; mp_limb_t qh, cy, qsave; mp_ptr tp; TMP_DECL; TMP_MARK; tp = TMP_SALLOC_LIMBS (dn+1); qn = nn - dn; qp += qn; np += nn; dp += dn; if (qn > dn) { qn++; /* pretend we'll need an extra limb */ /* Reduce qn mod dn without division, optimizing small operations. */ do qn -= dn; while (qn > dn); qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ /* Perform the typically smaller block first. */ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } qn = nn - dn - qn + 1; while (qn > dn) { qp -= dn; np -= dn; mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp); qn -= dn; } /* Since we pretended we'd need an extra quotient limb before, we now have made sure the code above left just dn-1=qn quotient limbs to develop. Develop that plus a guard limb. */ qn--; qp -= qn; np -= dn; qsave = qp[qn]; mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp); MPN_COPY_INCR (qp, qp + 1, qn); qp[qn] = qsave; } else { if (qn == 0) { qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; if (qh) mpn_sub_n (np - dn, np - dn, dp - dn, dn); TMP_FREE; return qh; } qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) /* Full precision. Optimal? */ qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip); else { /* Put quotient in tp, use qp as temporary, since qp lacks a limb. */ qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp); MPN_COPY (qp, tp + 1, qn); } } TMP_FREE; return qh; }
int main(void) { mp_limb_t r1, r2, dinv1, dinv2, a1[80], a2[80], b[40], q1[80], q2[80]; mp_size_t limbs = 10; long i, j; for (i = 0; i < 1000; i++) { mpn_random(a1, 2*limbs); mpn_random(b, limbs); mpn_copyi(a2, a1, 2*limbs); b[limbs - 1] |= GMP_NUMB_HIGHBIT; /* normalise b */ #if (TIME == MPIR) || TEST invert_1(dinv1, b[limbs - 1], b[limbs - 2]); #endif #if (TIME == THIS) || TEST dinv2 = div_preinv1(b[limbs - 1], b[limbs - 2]); #endif #if TEST r1 = mpn_sb_div_qr(q1, a1, 2*limbs, b, limbs, dinv1); r2 = div_basecase(q2, a2, 2*limbs, b, limbs, dinv2); #endif #if TIME for (j = 0; j < 1000; j++) { #if TIME == MPIR r1 = mpn_sb_div_qr(q1, a1, 2*limbs, b, limbs, dinv1); mpn_copyi(a1, a2, 2*limbs); #endif #if TIME == THIS r2 = div_basecase(q2, a2, 2*limbs, b, limbs, dinv2); mpn_copyi(a2, a1, 2*limbs); #endif } #endif #if TEST if (r1 != r2) { printf("Error in most significant limb\n", j); printf("%lu vs %lu\n", r1, r2); abort(); } for (j = 0; j < limbs; j++) { if (q1[limbs - j - 1] != q2[limbs - j - 1]) { printf("Error in limb %ld of quotient\n", limbs - j - 1); printf("%lu vs %lu\n", q1[limbs - j - 1], q2[limbs - j - 1]); abort(); } } for (j = 0; j < limbs; j++) { if (a1[limbs - j - 1] != a2[limbs - j - 1]) { printf("Error in limb %ld of remainder\n", limbs - j - 1); printf("%lu vs %lu\n", a1[limbs - j - 1], a2[limbs - j - 1]); abort(); } } #endif } printf("PASS\n"); return 0; }
mp_limb_t mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t q_orig, qn, sh, sl, i; mp_limb_t qh, cy, cy2; mp_ptr tp; TMP_DECL; ASSERT (dn >= 6); ASSERT (nn >= dn + 3); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); qn = nn - dn; if (qn + 1 < dn) { dp += dn - (qn + 1); dn = qn + 1; } q_orig = qn; qh = mpn_cmp(np + nn - dn, dp, dn) >= 0; if (qh != 0) mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn); np += nn - dn - qn; nn = dn + qn; /* Reduce until dn - 1 >= qn */ while (dn - 1 < qn) { sh = MIN(dn, qn - dn + 1); if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); qn -= sh; nn -= sh; } cy = np[nn - 1]; /* split into two parts */ sh = qn/2; sl = qn - sh; /* Rare case where truncation ruins normalisation */ if (cy > dp[dn - 1] || (cy == dp[dn - 1] && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0)) { __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn); return qh; } if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0) __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh); else { if (sh < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); else mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); } cy = np[nn - sh]; TMP_MARK; tp = TMP_ALLOC_LIMBS(sl + 2); mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh); cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2); TMP_FREE; while ((mp_limb_signed_t) cy < 0) { qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */ /* correct remainder, noting that "digits" of quotient aren't base B but in base varying with truncation, thus correction needs fixup */ cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++) cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]); } if (cy != 0) /* special case: unable to canonicalise */ __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl); else { if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0) __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl); else { if (sl < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv); else mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv); } } return qh; }