/* Computes the quotient and remainder of { np, 2*dn } by { dp, dn }. We require dp to be normalised and inv to be a precomputed inverse of { dp, dn } given by mpn_invert. */ mp_limb_t mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t dn, mp_srcptr inv) { mp_limb_t cy, lo, ret = 0, ret2 = 0; mp_size_t m, i; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT(mpn_is_invert(inv, dp, dn)); if (mpn_cmp(np + dn, dp, dn) >= 0) { ret2 = 1; mpn_sub_n(np + dn, np + dn, dp, dn); } tp = TMP_ALLOC_LIMBS(2*dn + 1); mpn_mul(tp, np + dn - 1, dn + 1, inv, dn); add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]); ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn); ret += mpn_add_1(qp, qp, dn, cy); /* Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then DX < B^{2*dn} <= D(X+1), thus Let N' = { np + n - 1, n + 1 } N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1 N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B There is either one integer in this range, or two. However, in the latter case the left hand bound is either an integer or < 2/B below one. */ if (UNLIKELY(ret == 1)) { ret -= mpn_sub_1(qp, qp, dn, 1); ASSERT(ret == 0); } ret -= mpn_sub_1(qp, qp, dn, 1); if (UNLIKELY(ret == ~CNST_LIMB(0))) ret += mpn_add_1(qp, qp, dn, 1); /* ret is now guaranteed to be 0 or 1*/ ASSERT(ret == 0); m = dn + 1; if ((dn <= MPN_FFT_MUL_N_MINSIZE) || (ret)) { mpn_mul_n(tp, qp, dp, dn); } else { mp_limb_t cy, cy2; if (m >= FFT_MULMOD_2EXPP1_CUTOFF) m = mpir_fft_adjust_limbs (m); cy = mpn_mulmod_Bexpp1_fft (tp, m, qp, dn, dp, dn); /* cy, {tp, m} = qp * dp mod (B^m+1) */ cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m); mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2); /* Make correction */ mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]); } mpn_sub_n(np, np, tp, m); MPN_ZERO(np + m, 2*dn - m); while (np[dn] || mpn_cmp(np, dp, dn) >= 0) { ret += mpn_add_1(qp, qp, dn, 1); np[dn] -= mpn_sub_n(np, np, dp, dn); } /* Not possible for ret == 2 as we have qp*dp <= np */ ASSERT(ret + ret2 < 2); TMP_FREE; return ret + ret2; }
/* Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is either correct or one too large. We require dp to be normalised and inv to be a precomputed inverse given by mpn_invert. */ mp_limb_t mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t dn, mp_srcptr inv) { mp_limb_t cy, lo, ret = 0, ret2 = 0; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT); ASSERT(mpn_is_invert(inv, dp, dn)); if (mpn_cmp(np + dn, dp, dn) >= 0) { ret2 = 1; mpn_sub_n(np + dn, np + dn, dp, dn); } tp = TMP_ALLOC_LIMBS(2*dn + 1); mpn_mul(tp, np + dn - 1, dn + 1, inv, dn); add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]); ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn); ret += mpn_add_1(qp, qp, dn, cy + 1); /* Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then DX < B^{2*dn} <= D(X+1), thus Let N' = { np + n - 1, n + 1 } N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1 N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B There is either one integer in this range, or two. However, in the latter case the left hand bound is either an integer or < 2/B below one. */ if (UNLIKELY(ret == 1)) { ret -= mpn_sub_1(qp, qp, dn, 1); ASSERT(ret == 0); } if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) { /* Special case, multiply out to get accurate quotient */ ret -= mpn_sub_1(qp, qp, dn, 1); if (UNLIKELY(ret == ~CNST_LIMB(0))) ret += mpn_add_1(qp, qp, dn, 1); /* ret is now guaranteed to be 0*/ ASSERT(ret == 0); mpn_mul_n(tp, qp, dp, dn); mpn_sub_n(tp, np, tp, dn+1); while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0) { ret += mpn_add_1(qp, qp, dn, 1); tp[dn] -= mpn_sub_n(tp, tp, dp, dn); } /* Not possible for ret == 2 as we have qp*dp <= np */ ASSERT(ret + ret2 < 2); } TMP_FREE; return ret + ret2; }
/* Input: A = {ap, n} with most significant bit set. Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS. X is a lower approximation of B^(2n)/A with implicit msb. More precisely, one has: A*X < B^(2n) <= A*(X+1) or X = ceil(B^(2n)/A) - 1. */ void mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n) { if (n == 1) { /* invert_limb returns min(B-1, floor(B^2/ap[0])-B), which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1. For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where the equality holds only when A=B/2. We thus have A*X < B^2 <= A*(X+1). */ invert_limb (xp[0], ap[0]); } else if (n == 2) { mp_limb_t tp[4], up[2], sp[2], cy; tp[0] = ZERO; invert_limb (xp[1], ap[1]); tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]); cy = mpn_add_n (tp + 2, tp + 2, ap, 2); while (cy) /* Xh is too large */ { xp[1] --; cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2); } /* tp[3] should be 111...111 */ mpn_com_n (sp, tp + 1, 2); cy = mpn_add_1 (sp, sp, 2, ONE); /* cy should be 0 */ up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]); cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]); /* cy should be 0 */ xp[0] = up[1]; /* update tp */ cy = mpn_addmul_1 (tp, ap, 2, xp[0]); cy = mpn_add_1 (tp + 2, tp + 2, 2, cy); do { cy = mpn_add (tp, tp, 4, ap, 2); if (cy == ZERO) mpn_add_1 (xp, xp, 2, ONE); } while (cy == ZERO); /* now A*X < B^4 <= A*(X+1) */ } else { mp_size_t l, h; mp_ptr tp, up; mp_limb_t cy, th; int special = 0; TMP_DECL; l = (n - 1) / 2; h = n - l; mpn_invert (xp + l, ap + l, h); TMP_MARK; tp = TMP_ALLOC_LIMBS (n + h); up = TMP_ALLOC_LIMBS (2 * h); if (n <= WRAP_AROUND_BOUND) { mpn_mul (tp, ap, n, xp + l, h); cy = mpn_add_n (tp + h, tp + h, ap, n); } else { mp_size_t m = n + 1; mpir_ui k; int cc; if (m >= FFT_MULMOD_2EXPP1_CUTOFF) m = mpir_fft_adjust_limbs (m); /* we have m >= n + 1 by construction, thus m > h */ ASSERT(m < n + h); cy = mpn_mulmod_Bexpp1_fft (tp, m, ap, n, xp + l, h); /* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */ cy += mpn_add_n (tp + h, tp + h, ap, m - h); cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m); cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc); if (cc > cy) /* can only occur if cc=1 and cy=0 */ cy = mpn_add_1 (tp, tp, m, ONE); else cy -= cc; /* cy, {tp, m} = A * Xh */ /* add B^(n+h) + B^(n+h-m) */ MPN_ZERO (tp + m, n + h - m); tp[m] = cy; /* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1, the mpn_incr_u() below cannot produce a carry */ mpn_incr_u (tp + n + h - m, ONE); cy = 1; do /* check if T >= B^(n+h) + 2*B^n */ { mp_size_t i; if (cy == ZERO) break; /* surely T < B^(n+h) */ if (cy == ONE) { for (i = n + h - 1; tp[i] == ZERO && i > n; i--); if (i == n && tp[i] < (mp_limb_t) 2) break; } /* subtract B^m+1 */ cy -= mpn_sub_1 (tp, tp, n + h, ONE); cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE); } while (1); } while (cy) { mpn_sub_1 (xp + l, xp + l, h, ONE); cy -= mpn_sub (tp, tp, n + h, ap, n); } mpn_not (tp, n); th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE); mpn_mul_n (up, tp + l, xp + l, h); cy = mpn_add_n (up + h, up + h, tp + l, h); if (th != ZERO) { cy += ONE + mpn_add_n (up + h, up + h, xp + l, h); } if (up[2*h-l-1] + 4 <= CNST_LIMB(3)) special = 1; MPN_COPY (xp, up + 2 * h - l, l); mpn_add_1 (xp + l, xp + l, h, cy); TMP_FREE; if ((special) && !mpn_is_invert(xp, ap, n)) mpn_add_1 (xp, xp, n, 1); } }