/* ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1 needs (tp, 2n) temp space, everything reduced mod 2^b inputs, outputs are fully reduced N.B: 2n is not the same as 2b rounded up to nearest limb! */ inline static int mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, mpir_ui b, mp_ptr tp) { mp_size_t n, k; mp_limb_t c; TMP_DECL; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #ifndef TUNE_PROGRAM_BUILD if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n)) { mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_ptr tx, ty, tz; mp_limb_t ret; TMP_MARK; tx = TMP_BALLOC_LIMBS(3*n + 3); ty = tx + n + 1; tz = ty + n + 1; MPN_COPY(ty, yp, n); MPN_COPY(tz, zp, n); ty[n] = 0; tz[n] = 0; while ((((mp_limb_t)1)<<depth) < b) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = b/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1); MPN_COPY(xp, tx, n); ret = tx[n]; TMP_FREE; return ret; } #endif if (yp == zp) mpn_sqr(tp, yp, n); else mpn_mul_n (tp, yp, zp, n); if (k == 0) { c = mpn_sub_n (xp, tp, tp + n, n); return mpn_add_1 (xp, xp, n, c); } c = tp[n - 1]; tp[n - 1] &= GMP_NUMB_MASK >> k; #if HAVE_NATIVE_mpn_sublsh_nc c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); #else { mp_limb_t c1; c1 = mpn_lshift (tp + n, tp + n, n, k); tp[n] |= c >> (GMP_NUMB_BITS - k); c = mpn_sub_n (xp, tp, tp + n, n) + c1; } #endif c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; return c; }
mp_limb_t mpn_mu_div_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch) { mp_ptr tp, rp; mp_size_t qn; mp_limb_t cy, qh; TMP_DECL; TMP_MARK; qn = nn - dn; tp = TMP_BALLOC_LIMBS (qn + 1); if (qn >= dn) /* nn >= 2*dn + 1 */ { /* |_______________________| dividend |________| divisor */ rp = TMP_BALLOC_LIMBS (nn + 1); MPN_COPY (rp + 1, np, nn); rp[0] = 0; qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0; if (qh != 0) mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn); cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch); if (UNLIKELY (cy != 0)) { /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was canonically reduced, replace the returned value of B^(qn-dn)+eps by the largest possible value. */ mp_size_t i; for (i = 0; i < qn + 1; i++) tp[i] = GMP_NUMB_MAX; } /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is smaller than the max error, we cannot trust the quotient. */ if (tp[0] > 4) { MPN_COPY (qp, tp + 1, qn); } else { mp_limb_t cy; mp_ptr pp; pp = rp; mpn_mul (pp, tp + 1, qn, dp, dn); cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0; if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */ qh -= mpn_sub_1 (qp, tp + 1, qn, 1); else /* Same as above */ MPN_COPY (qp, tp + 1, qn); } } else { /* |_______________________| dividend |________________| divisor */ /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only the most significant dn-1 limbs will actually be read, but it is not pretty. */ qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2, dp + dn - (qn + 1), qn + 1, scratch); /* The max error of mpn_mu_divappr_q is +4, but we get an additional error from the divisor truncation. */ if (tp[0] > 6) { MPN_COPY (qp, tp + 1, qn); } else { mp_limb_t cy; /* FIXME: a shorter product should be enough; we may use already allocated space... */ rp = TMP_BALLOC_LIMBS (nn); mpn_mul (rp, dp, dn, tp + 1, qn); cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0; if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */ qh -= mpn_sub_1 (qp, tp + 1, qn, 1); else /* Same as above */ MPN_COPY (qp, tp + 1, qn); } } TMP_FREE; return qh; }