mp_size_t mpn_binvert_itch (mp_size_t n) { #if WANT_FFT if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD)) return mpn_fft_next_size (n, mpn_fft_best_k (n, 0)); else #endif return 3 * (n - (n >> 1)); }
void mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) { mp_ptr xp; mp_size_t rn, newrn; mp_size_t sizes[NPOWS], *sizp; mp_limb_t di; /* Compute the computation precisions from highest to lowest, leaving the base case size in 'rn'. */ sizp = sizes; for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1) *sizp++ = rn; xp = scratch; /* Compute a base value using a low-overhead O(n^2) algorithm. FIXME: We should call some divide-and-conquer lsb division function here for an operand subrange. */ MPN_ZERO (xp, rn); xp[0] = 1; binvert_limb (di, up[0]); if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD)) mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di); else mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di); /* Use Newton iterations to get the desired precision. */ for (; rn < n; rn = newrn) { newrn = *--sizp; #if WANT_FFT if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD)) { int k; mp_size_t m, i; k = mpn_fft_best_k (newrn, 0); m = mpn_fft_next_size (newrn, k); mpn_mul_fft (xp, m, up, newrn, rp, rn, k); for (i = rn - 1; i >= 0; i--) if (xp[i] > (i == 0)) { mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1); break; } } else #endif mpn_mul (xp, up, newrn, rp, rn); mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn); mpn_neg_n (rp + rn, rp + rn, newrn - rn); } }
mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t n) { mp_size_t nh; if (BELOW_THRESHOLD (n, MULMOD_BNM1_THRESHOLD)) return n; if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) return (n + (2-1)) & (-2); if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) return (n + (4-1)) & (-4); nh = (n + 1) >> 1; if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD)) return (n + (8-1)) & (-8); return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0)); }