mp_limb_t mpn_divrem (mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { ASSERT (qxn >= 0); ASSERT (nn >= dn); ASSERT (dn >= 1); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn)); ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn); ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn)); ASSERT_MPN (np, nn); ASSERT_MPN (dp, dn); if (dn == 1) { mp_limb_t ret; mp_ptr q2p; mp_size_t qn; TMP_DECL; TMP_MARK; q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB); np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]); qn = nn + qxn - 1; MPN_COPY (qp, q2p, qn); ret = q2p[qn]; TMP_FREE; return ret; } else if (dn == 2) { return mpn_divrem_2 (qp, qxn, np, nn, dp); } else { mp_ptr rp, q2p; mp_limb_t qhl; mp_size_t qn; TMP_DECL; TMP_MARK; if (UNLIKELY (qxn != 0)) { mp_ptr n2p; n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB); MPN_ZERO (n2p, qxn); MPN_COPY (n2p + qxn, np, nn); q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB); rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn); MPN_COPY (np, rp, dn); qn = nn - dn + qxn; MPN_COPY (qp, q2p, qn); qhl = q2p[qn]; } else { q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB); rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn); MPN_COPY (np, rp, dn); /* overwrite np area with remainder */ qn = nn - dn; MPN_COPY (qp, q2p, qn); qhl = q2p[qn]; } TMP_FREE; return qhl; } }
mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c) { mp_limb_t s, h, l, inverse, dummy, dmul, ret; mp_limb_t c = orig_c; mp_size_t i; ASSERT (size >= 1); ASSERT (d & 1); ASSERT_MPN (src, size); ASSERT_LIMB (d); ASSERT_LIMB (c); if (size == 1) { s = src[0]; if (s > c) { l = s-c; h = l % d; if (h != 0) h = d - h; } else { l = c-s; h = l % d; } return h; } modlimb_invert (inverse, d); dmul = d << GMP_NAIL_BITS; i = 0; do { s = src[i]; SUBC_LIMB (c, l, s, c); l = (l * inverse) & GMP_NUMB_MASK; umul_ppmm (h, dummy, l, dmul); c += h; } while (++i < size-1); s = src[i]; if (s <= d) { /* With high<=d the final step can be a subtract and addback. If c==0 then the addback will restore to l>=0. If c==d then will get l==d if s==0, but that's ok per the function definition. */ l = c - s; if (c < s) l += d; ret = l; } else { /* Can't skip a divide, just do the loop code once more. */ SUBC_LIMB (c, l, s, c); l = (l * inverse) & GMP_NUMB_MASK; umul_ppmm (h, dummy, l, dmul); c += h; ret = c; } ASSERT (orig_c < d ? ret < d : ret <= d); return ret; }
mp_limb_t mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d) { mp_limb_t v_inv; ASSERT (usize >= 1); ASSERT (vsize >= 1); ASSERT (usize * GMP_NUMB_BITS >= d); ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize)); ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize)); ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize)); ASSERT_MPN (up, usize); ASSERT_MPN (vp, vsize); /* 1/V mod 2^GMP_NUMB_BITS. */ binvert_limb (v_inv, vp[0]); /* Fast code for two cases previously used by the accel part of mpn_gcd. (Could probably remove this now it's inlined there.) */ if (usize == 2 && vsize == 2 && (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS)) { mp_limb_t hi, lo; mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK; umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS); up[0] = 0; up[1] -= hi + q*vp[1]; qp[0] = q; if (d == 2*GMP_NUMB_BITS) { q = (up[1] * v_inv) & GMP_NUMB_MASK; up[1] = 0; qp[1] = q; } return 0; } /* Main loop. */ while (d >= GMP_NUMB_BITS) { mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK; mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); if (usize > vsize) mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); d -= GMP_NUMB_BITS; up += 1, usize -= 1; *qp++ = q; } if (d) { mp_limb_t b; mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1); if (q <= 1) { if (q == 0) return 0; else b = mpn_sub_n (up, up, vp, MIN (usize, vsize)); } else b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); if (usize > vsize) mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); return q; } return 0; }
mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c) { mp_limb_t c = orig_c; mp_limb_t s, l, q, h, inverse; ASSERT (size >= 1); ASSERT (d & 1); ASSERT_MPN (src, size); ASSERT_LIMB (d); ASSERT_LIMB (c); /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */ if (size == 1) { s = src[0]; if (s > c) { l = s-c; h = l % d; if (h != 0) h = d - h; } else { l = c-s; h = l % d; } return h; } binvert_limb (inverse, d); if (d <= 0xFFFFFFFF) { s = *src++; size--; do { SUBC_LIMB (c, l, s, c); s = *src++; q = l * inverse; umul_ppmm_half_lowequal (h, q, d, l); c += h; size--; } while (size != 0); if (s <= d) { /* With high s <= d the final step can be a subtract and addback. If c==0 then the addback will restore to l>=0. If c==d then will get l==d if s==0, but that's ok per the function definition. */ l = c - s; l += (l > c ? d : 0); ASSERT_RETVAL (l); return l; } else { /* Can't skip a divide, just do the loop code once more. */ SUBC_LIMB (c, l, s, c); q = l * inverse; umul_ppmm_half_lowequal (h, q, d, l); c += h; ASSERT_RETVAL (c); return c; } } else { mp_limb_t dl = LOW32 (d); mp_limb_t dh = HIGH32 (d); long i; s = *src++; size--; do { SUBC_LIMB (c, l, s, c); s = *src++; q = l * inverse; umul_ppmm_lowequal (h, q, d, dh, dl, l); c += h; size--; } while (size != 0); if (s <= d) { /* With high s <= d the final step can be a subtract and addback. If c==0 then the addback will restore to l>=0. If c==d then will get l==d if s==0, but that's ok per the function definition. */ l = c - s; l += (l > c ? d : 0); ASSERT_RETVAL (l); return l; } else { /* Can't skip a divide, just do the loop code once more. */ SUBC_LIMB (c, l, s, c); q = l * inverse; umul_ppmm_lowequal (h, q, d, dh, dl, l); c += h; ASSERT_RETVAL (c); return c; } } }
/* (rp, 2n) = (xp, n)*(yp, n) */ static void mpn_mulshort_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t m; mp_limb_t t; mp_ptr rpn2; ASSERT(n >= 1); ASSERT_MPN(xp, n); ASSERT_MPN(yp, n); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n)); if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD)) { mpn_mul_basecase(rp, xp, n, yp, n); return; } if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD)) { mpn_mulshort_n_basecase(rp, xp, yp, n); return; } /* choose optimal m s.t. n + 2 <= 2m, m < n */ ASSERT (n >= 4); m = 87 * n / 128; if (2 * m < n + 2) m = (n + 1) / 2 + 1; if (m >= n) m = n - 1; ASSERT (n + 2 <= 2 * m); ASSERT (m < n); rpn2 = rp + n - 2; mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m); mpn_mulshort_n (rp, xp, yp + m, n - m); ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); mpn_mulshort_n (rp, xp + m, yp, n - m); ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS); rp[0] = t >> GMP_NAIL_BITS; ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS); rp[0] = t >> GMP_NAIL_BITS; ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); return; }
/* t = x - y - z or t = x - (y + z) which explains the name */ mp_limb_t mpn_subadd_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n) { mp_limb_t ret; ASSERT(n > 0); ASSERT_MPN(x, n); ASSERT_MPN(y, n); ASSERT_MPN(z, n); ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n)); if (t == x && t == y && t == z) return mpn_neg(t,z,n); if (t == x && t == y) { ret = mpn_sub_n(t, x, y, n); ret += mpn_sub_n(t, t, z, n); return ret; } if (t == x && t == z) { ret = mpn_sub_n(t, x, z, n); ret += mpn_sub_n(t, t, y, n); return ret; } if (t == y && t == z) { ret = mpn_add_n(t, y, z, n); ret += mpn_sub_n(t, x, t, n); return ret; } if (t == x) { ret = mpn_sub_n(t, x, y, n); ret += mpn_sub_n(t, t, z, n); return ret; } if (t == y) { ret = mpn_sub_n(t, x, y, n); ret += mpn_sub_n(t, t, z, n); return ret; } if (t == z) { ret = mpn_sub_n(t, x, z, n); ret += mpn_sub_n(t, t, y, n); return ret; } ret = mpn_sub_n(t, x, z, n); ret += mpn_sub_n(t, t, y, n); return ret; }
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ inline static void mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t i, k; ASSERT(n >= 3); /* this restriction doesn't make a lot of sense in general */ ASSERT_MPN(xp, n); ASSERT_MPN(yp, n); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n)); k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */ i = 0; /* Multiply w limbs from y + i to (2 + i + w - 1) limbs from x + (n - 2 - i - w + 1) and put it into r + (n - 2 - w + 1), "overflow" (i.e. last) limb into r + (n + w - 1) for i between 0 and n - 2. i == n - w needs special treatment. */ /* We first multiply by the low order limb (or depending on optional function availability, limbs). This result can be stored, not added, to rp. We also avoid a loop for zeroing this way. */ #if HAVE_NATIVE_mpn_mul_2 rp[n + 1] = mpn_mul_2 (rp + k - 1, xp + k - 1, 2 + 1, yp); i += 2; #else rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]); i += 1; #endif #if HAVE_NATIVE_mpn_addmul_6 while (i < n - 6) { rp[n + i + 6 - 1] = mpn_addmul_6 (rp + k - 6 + 1, xp + k - i - 6 + 1, 2 + i + 6 - 1, yp + i); i += 6; } if (i == n - 6) { rp[n + n - 1] = mpn_addmul_6 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_5 while (i < n - 5) { rp[n + i + 5 - 1] = mpn_addmul_5 (rp + k - 5 + 1, xp + k - i - 5 + 1, 2 + i + 5 - 1, yp + i) i += 5; } if (i == n - 5) { rp[n + n - 1] = mpn_addmul_5 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_4 while (i < n - 4) { rp[n + i + 4 - 1] = mpn_addmul_4 (rp + k - 4 + 1, xp + k - i - 4 + 1, 2 + i + 4 - 1, yp + i); i += 4; } if (i == n - 4) { rp[n + n - 1] = mpn_addmul_4 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_3 while (i < n - 3) { rp[n + i + 3 - 1] = mpn_addmul_3 (rp + k - 3 + 1, xp + k - i - 3 + 1, 2 + i + 3 - 1, yp + i); i += 3; } if (i == n - 3) { rp[n + n - 1] = mpn_addmul_3 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_2 while (i < n - 2) { rp[n + i + 2 - 1] = mpn_addmul_2 (rp + k - 2 + 1, xp + k - i - 2 + 1, 2 + i + 2 - 1, yp + i); i += 2; } if (i == n - 2) { rp[n + n - 1] = mpn_addmul_2 (rp + i, xp, n, yp + i); return; } #endif while (i < n - 1) { rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); i += 1; } rp[n + n - 1] = mpn_addmul_1 (rp + i, xp, n, yp[i]); return; }
/* ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1 needs (tp, 2n) temp space, everything reduced mod 2^b inputs, outputs are fully reduced N.B: 2n is not the same as 2b rounded up to nearest limb! */ inline static int mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, mpir_ui b, mp_ptr tp) { mp_size_t n, k; mp_limb_t c; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; TMP_DECL; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(!MPN_OVERLAP_P (xp, n, yp, n)); ASSERT(!MPN_OVERLAP_P (xp, n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #ifndef TUNE_PROGRAM_BUILD if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n)) { mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_ptr tx, ty, tz; TMP_MARK; tx = TMP_BALLOC_LIMBS(3*n + 3); ty = tx + n + 1; tz = ty + n + 1; MPN_COPY(ty, yp, n); MPN_COPY(tz, zp, n); ty[n] = 0; tz[n] = 0; while ((((mp_limb_t)1)<<depth) < b) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = b/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1); MPN_COPY(xp, tx, n); TMP_FREE; return tx[n]; } #endif if (yp == zp) mpn_sqr(tp, yp, n); else mpn_mul_n (tp, yp, zp, n); if (k == 0) { c = mpn_sub_n (xp, tp, tp + n, n); return mpn_add_1 (xp, xp, n, c); } c = tp[n - 1]; tp[n - 1] &= GMP_NUMB_MASK >> k; #if HAVE_NATIVE_mpn_sublsh_nc c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); #else { mp_limb_t c1; c1 = mpn_lshift (tp + n, tp + n, n, k); tp[n] |= c >> (GMP_NUMB_BITS - k); c = mpn_sub_n (xp, tp, tp + n, n) + c1; } #endif c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; return c; }
mp_limb_t mpn_div_qr_1n_pi2 (mp_ptr qp, mp_srcptr up, mp_size_t un, struct precomp_div_1_pi2 *pd) { mp_limb_t most_significant_q_limb; mp_size_t i; mp_limb_t r, u2, u1, u0; mp_limb_t d0, di1, di0; mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d; mp_limb_t cnd; ASSERT (un >= 2); ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0); ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up); ASSERT_MPN (up, un); #define q3 q3a #define q2 q2b #define q1 q1b up += un - 3; r = up[2]; d0 = pd->d; most_significant_q_limb = (r >= d0); r -= d0 & -most_significant_q_limb; qp += un - 3; qp[2] = most_significant_q_limb; di1 = pd->dip[1]; di0 = pd->dip[0]; for (i = un - 3; i >= 0; i -= 2) { u2 = r; u1 = up[1]; u0 = up[0]; /* Dividend in {r,u1,u0} */ umul_ppmm (q1d,q0d, u1, di0); umul_ppmm (q2b,q1b, u1, di1); q2b++; /* cannot spill */ add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0); umul_ppmm (q2c,q1c, u2, di0); add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c); umul_ppmm (q3a,q2a, u2, di1); add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d); q3 += r; r = u0 - q2 * d0; cnd = (r >= q1); r += d0 & -cnd; sub_ddmmss (q3,q2, q3,q2, 0,cnd); if (UNLIKELY (r >= d0)) { r -= d0; add_ssaaaa (q3,q2, q3,q2, 0,1); } qp[0] = q2; qp[1] = q3; up -= 2; qp -= 2; } if ((un & 1) == 0) { u2 = r; u1 = up[1]; udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1); qp[1] = q3; } return r; #undef q3 #undef q2 #undef q1 }
mp_limb_t mpn_sb_divrem_mn (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { mp_limb_t most_significant_q_limb = 0; mp_size_t qn = nn - dn; mp_size_t i; mp_limb_t dx, d1, n0; mp_limb_t dxinv; int use_preinv; ASSERT (dn > 2); ASSERT (nn >= dn); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn)); ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn)); ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np); ASSERT_MPN (np, nn); ASSERT_MPN (dp, dn); np += qn; dx = dp[dn - 1]; d1 = dp[dn - 2]; n0 = np[dn - 1]; if (n0 >= dx) { if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0) { mpn_sub_n (np, np, dp, dn); most_significant_q_limb = 1; } } use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD); if (use_preinv) invert_limb (dxinv, dx); for (i = qn - 1; i >= 0; i--) { mp_limb_t q; mp_limb_t nx; mp_limb_t cy_limb; nx = np[dn - 1]; /* FIXME: could get value from r1 */ np--; if (nx == dx) { /* This might over-estimate q, but it's probably not worth the extra code here to find out. */ q = GMP_NUMB_MASK; #if 1 cy_limb = mpn_submul_1 (np, dp, dn, q); #else /* This should be faster on many machines */ cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn); cy = mpn_add_n (np, np, dp, dn); np[dn] += cy; #endif if (nx != cy_limb) { mpn_add_n (np, np, dp, dn); q--; } qp[i] = q; } else { mp_limb_t rx, r1, r0, p1, p0; /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage when np[dn-1] is used in an asm statement like umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due to registers being clobbered. gcc 2.95 i386 doesn't have the problem. */ { mp_limb_t workaround = np[dn - 1]; if (CACHED_ABOVE_THRESHOLD (use_preinv, DIV_SB_PREINV_THRESHOLD)) udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); else { udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS, dx << GMP_NAIL_BITS); r1 >>= GMP_NAIL_BITS; } } umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS); p0 >>= GMP_NAIL_BITS; r0 = np[dn - 2]; rx = 0; if (r1 < p1 || (r1 == p1 && r0 < p0)) { p1 -= p0 < d1; p0 = (p0 - d1) & GMP_NUMB_MASK; q--; r1 = (r1 + dx) & GMP_NUMB_MASK; rx = r1 < dx; } p1 += r0 < p0; /* cannot carry! */ rx -= r1 < p1; /* may become 11..1 if q is still too large */ r1 = (r1 - p1) & GMP_NUMB_MASK; r0 = (r0 - p0) & GMP_NUMB_MASK; cy_limb = mpn_submul_1 (np, dp, dn - 2, q); /* Check if we've over-estimated q, and adjust as needed. */ { mp_limb_t cy1, cy2; cy1 = r0 < cy_limb; r0 = (r0 - cy_limb) & GMP_NUMB_MASK; cy2 = r1 < cy1; r1 -= cy1; np[dn - 1] = r1; np[dn - 2] = r0; if (cy2 != rx) { mpn_add_n (np, np, dp, dn); q--; } } qp[i] = q; } } /* ______ ______ ______ |__rx__|__r1__|__r0__| partial remainder ______ ______ - |__p1__|__p0__| partial product to subtract ______ ______ - |______|cylimb| rx is -1, 0 or 1. If rx=1, then q is correct (it should match carry out). If rx=-1 then q is too large. If rx=0, then q might be too large, but it is most likely correct. */ return most_significant_q_limb; }