/* Put in rp[n..2n-1] an approximation of the n high limbs of {np, n}^2. The error is less than n ulps of rp[n]. */ void mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n) { mp_size_t k; MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */ k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n] : (n+4)/2; /* ensures that k >= (n+3)/2 */ MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n)); if (k < 0) /* we can't use mpn_sqr_basecase here, since it requires n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD is not exported by GMP */ mpn_sqr_n (rp, np, n); else if (k == 0) mpfr_mulhigh_n_basecase (rp, np, np, n); else { mp_size_t l = n - k; mp_limb_t cy; mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */ /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */ cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1); cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } }
void mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m) { mp_ptr xp, tp, qp, mp, bp; mp_size_t xn, tn, mn, bn; int m_zero_cnt; int c; mp_limb_t e; TMP_DECL; mp = PTR(m); mn = ABSIZ(m); if (mn == 0) DIVIDE_BY_ZERO; if (el == 0) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending on if MOD equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; return; } TMP_MARK; /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } bn = ABSIZ(b); bp = PTR(b); if (bn > mn) { /* Reduce possibly huge base. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn); reduce (new_bp, bp, bn, mp, mn); bp = new_bp; bn = mn; /* Canonicalize the base, since we are potentially going to multiply with it quite a few times. */ MPN_NORMALIZE (bp, bn); } if (bn == 0) { SIZ(r) = 0; TMP_FREE; return; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); xp = TMP_ALLOC_LIMBS (mn); qp = TMP_ALLOC_LIMBS (mn + 1); MPN_COPY (xp, bp, bn); xn = bn; e = el; count_leading_zeros (c, e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = BITS_PER_MP_LIMB - 1 - c; /* Main loop. */ /* If m is already normalized (high bit of high limb set), and b is the same size, but a bigger value, and e==1, then there's no modular reductions done and we can end up with a result out of range at the end. */ if (c == 0) { if (xn == mn && mpn_cmp (xp, mp, mn) >= 0) mpn_sub_n (xp, xp, mp, mn); goto finishup; } while (c != 0) { mpn_sqr_n (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } if ((mp_limb_signed_t) e < 0) { mpn_mul (tp, xp, xn, bp, bn); tn = xn + bn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } } e <<= 1; c--; } finishup: /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it with the original MOD. */ if (m_zero_cnt != 0) { mp_limb_t cy; cy = mpn_lshift (tp, xp, xn, m_zero_cnt); tp[xn] = cy; xn += cy != 0; if (xn < mn) { MPN_COPY (xp, tp, xn); } else { mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn); xn = mn; } mpn_rshift (xp, xp, xn, m_zero_cnt); } MPN_NORMALIZE (xp, xn); if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0) { mp = PTR(m); /* want original, unnormalized m */ mpn_sub (xp, mp, mn, xp, xn); xn = mn; MPN_NORMALIZE (xp, xn); } MPZ_REALLOC (r, xn); SIZ (r) = xn; MPN_COPY (PTR(r), xp, xn); TMP_FREE; }
int mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode) { int cc, inexact; mpfr_exp_t ax; mp_limb_t *tmp; mp_limb_t b1; mpfr_prec_t bq; mp_size_t bn, tn; MPFR_TMP_DECL(marker); MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", b, b, rnd_mode), ("y[%#R]=%R inexact=%d", a, a, inexact)); /* deal with special cases */ if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(b))) { if (MPFR_IS_NAN(b)) { MPFR_SET_NAN(a); MPFR_RET_NAN; } MPFR_SET_POS (a); if (MPFR_IS_INF(b)) MPFR_SET_INF(a); else ( MPFR_ASSERTD(MPFR_IS_ZERO(b)), MPFR_SET_ZERO(a) ); MPFR_RET(0); } ax = 2 * MPFR_GET_EXP (b); bq = MPFR_PREC(b); MPFR_ASSERTD (2 * bq > bq); /* PREC_MAX is /2 so no integer overflow */ bn = MPFR_LIMB_SIZE(b); /* number of limbs of b */ tn = 1 + (2 * bq - 1) / GMP_NUMB_BITS; /* number of limbs of square, 2*bn or 2*bn-1 */ MPFR_TMP_MARK(marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) 2 * bn * BYTES_PER_MP_LIMB); /* Multiplies the mantissa in temporary allocated space */ mpn_sqr_n (tmp, MPFR_MANT(b), bn); b1 = tmp[2 * bn - 1]; /* now tmp[0]..tmp[2*bn-1] contains the product of both mantissa, with tmp[2*bn-1]>=2^(GMP_NUMB_BITS-2) */ b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */ /* if the mantissas of b and c are uniformly distributed in ]1/2, 1], then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ tmp += 2 * bn - tn; /* +0 or +1 */ if (MPFR_UNLIKELY(b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ cc = mpfr_round_raw (MPFR_MANT (a), tmp, 2 * bq, 0, MPFR_PREC (a), rnd_mode, &inexact); /* cc = 1 ==> result is a power of two */ if (MPFR_UNLIKELY(cc)) MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT; MPFR_TMP_FREE(marker); { mpfr_exp_t ax2 = ax + (mpfr_exp_t) (b1 - 1 + cc); if (MPFR_UNLIKELY( ax2 > __gmpfr_emax)) return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); if (MPFR_UNLIKELY( ax2 < __gmpfr_emin)) { /* In the rounding to the nearest mode, if the exponent of the exact result (i.e. before rounding, i.e. without taking cc into account) is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if both arguments are powers of 2), then round to zero. */ if (rnd_mode == MPFR_RNDN && (ax + (mpfr_exp_t) b1 < __gmpfr_emin || mpfr_powerof2_raw (b))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); } MPFR_SET_EXP (a, ax2); MPFR_SET_POS (a); } MPFR_RET (inexact); }
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] Requires that mp[n-1..0] is odd. Requires that ep[en-1..0] is > 1. Uses scratch space tp[3n..0], i.e., 3n+1 words. */ void mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_srcptr ep, mp_size_t en, mp_srcptr mp, mp_size_t n, mp_ptr tp) { mp_limb_t mip[2]; int cnt; long ebi; int windowsize, this_windowsize; mp_limb_t expbits; mp_ptr pp, this_pp, last_pp; long i; int redc_x; TMP_DECL; ASSERT (en > 1 || (en == 1 && ep[0] > 1)); ASSERT (n >= 1 && ((mp[0] & 1) != 0)); TMP_MARK; count_leading_zeros (cnt, ep[en - 1]); ebi = en * GMP_LIMB_BITS - cnt; windowsize = win_size (ebi); if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD)) { binvert_limb (mip[0], mp[0]); mip[0] = -mip[0]; redc_x = 1; } #if defined (HAVE_NATIVE_mpn_addmul_2) else { mpn_binvert (mip, mp, 2, tp); mip[0] = -mip[0]; mip[1] = ~mip[1]; redc_x = 2; } #endif #if 0 mpn_binvert (mip, mp, n, tp); redc_x = 0; #endif pp = TMP_ALLOC_LIMBS (n << windowsize); this_pp = pp; this_pp[n] = 1; redcify (this_pp, this_pp + n, 1, mp, n); this_pp += n; redcify (this_pp, bp, bn, mp, n); /* Precompute powers of b and put them in the temporary area at pp. */ for (i = (1 << windowsize) - 2; i > 0; i--) { last_pp = this_pp; this_pp += n; mpn_mul_n (tp, last_pp, pp + n, n); MPN_REDC_X (this_pp, tp, mp, n, mip); } expbits = getbits (ep, ebi, windowsize); ebi -= windowsize; if (ebi < 0) ebi = 0; MPN_COPY (rp, pp + n * expbits, n); while (ebi != 0) { expbits = getbits (ep, ebi, windowsize); ebi -= windowsize; this_windowsize = windowsize; if (ebi < 0) { this_windowsize += ebi; ebi = 0; } do { mpn_sqr_n (tp, rp, n); MPN_REDC_X (rp, tp, mp, n, mip); this_windowsize--; } while (this_windowsize != 0); #if WANT_CACHE_SECURITY mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits); mpn_mul_n (tp, rp, tp + 2*n, n); #else mpn_mul_n (tp, rp, pp + n * expbits, n); #endif MPN_REDC_X (rp, tp, mp, n, mip); } MPN_COPY (tp, rp, n); MPN_ZERO (tp + n, n); MPN_REDC_X (rp, tp, mp, n, mip); if (mpn_cmp (rp, mp, n) >= 0) mpn_sub_n (rp, rp, mp, n); TMP_FREE; }
mp_size_t mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp) { mp_limb_t x; int cnt, i; mp_size_t rn; int par; if (exp <= 1) { if (exp == 0) { rp[0] = 1; return 1; } else { MPN_COPY (rp, bp, bn); return bn; } } /* Count number of bits in exp, and compute where to put initial square in order to magically get results in the entry rp. Use simple code, optimized for small exp. For large exp, the bignum operations will take so much time that the slowness of this code will be negligible. */ par = 0; cnt = GMP_LIMB_BITS; for (x = exp; x != 0; x >>= 1) { par ^= x & 1; cnt--; } exp <<= cnt; if (bn == 1) { mp_limb_t bl = bp[0]; if ((cnt & 1) != 0) MP_PTR_SWAP (rp, tp); mpn_sqr_n (rp, bp, bn); rn = 2 * bn; rn -= rp[rn - 1] == 0; for (i = GMP_LIMB_BITS - cnt - 1;;) { exp <<= 1; if ((exp & GMP_LIMB_HIGHBIT) != 0) { rp[rn] = mpn_mul_1 (rp, rp, rn, bl); rn += rp[rn] != 0; } if (--i == 0) break; mpn_sqr_n (tp, rp, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; MP_PTR_SWAP (rp, tp); } } else { if (((par ^ cnt) & 1) == 0) MP_PTR_SWAP (rp, tp); mpn_sqr_n (rp, bp, bn); rn = 2 * bn; rn -= rp[rn - 1] == 0; for (i = GMP_LIMB_BITS - cnt - 1;;) { exp <<= 1; if ((exp & GMP_LIMB_HIGHBIT) != 0) { rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0); MP_PTR_SWAP (rp, tp); } if (--i == 0) break; mpn_sqr_n (tp, rp, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; MP_PTR_SWAP (rp, tp); } } return rn; }
/* For now, also disable REDC when MOD is even, as the inverse can't handle that. At some point, we might want to make the code faster for that case, perhaps using CRR. */ #ifndef POWM_THRESHOLD #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) #endif #define HANDLE_NEGATIVE_EXPONENT 1 #undef REDUCE_EXPONENT void #ifndef BERKELEY_MP mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) #else /* BERKELEY_MP */ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) #endif /* BERKELEY_MP */ { mp_ptr xp, tp, qp, gp, this_gp; mp_srcptr bp, ep, mp; mp_size_t bn, es, en, mn, xn; mp_limb_t invm, c; unsigned long int enb; mp_size_t i, K, j, l, k; int m_zero_cnt, e_zero_cnt; int sh; int use_redc; #if HANDLE_NEGATIVE_EXPONENT mpz_t new_b; #endif #if REDUCE_EXPONENT mpz_t new_e; #endif TMP_DECL (marker); mp = PTR(m); mn = ABSIZ (m); if (mn == 0) DIVIDE_BY_ZERO; TMP_MARK (marker); es = SIZ (e); if (es <= 0) { if (es == 0) { /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if m equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; TMP_FREE (marker); /* we haven't really allocated anything here */ return; } #if HANDLE_NEGATIVE_EXPONENT MPZ_TMP_INIT (new_b, mn + 1); if (! mpz_invert (new_b, b, m)) DIVIDE_BY_ZERO; b = new_b; es = -es; #else DIVIDE_BY_ZERO; #endif } en = es; #if REDUCE_EXPONENT /* Reduce exponent by dividing it by phi(m) when m small. */ if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) { MPZ_TMP_INIT (new_e, 2); mpz_mod_ui (new_e, e, phi (mp[0])); e = new_e; } #endif use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; if (use_redc) { /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ modlimb_invert (invm, mp[0]); invm = -invm; } else { /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp; new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } } /* Determine optimal value of k, the number of exponent bits we look at at a time. */ count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); e_zero_cnt -= GMP_NAIL_BITS; enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ k = 1; K = 2; while (2 * enb > K * (2 + k * (3 + k))) { k++; K *= 2; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); qp = TMP_ALLOC_LIMBS (mn + 1); gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ bn = ABSIZ (b); bp = PTR(b); /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary for speed or correctness to do this when b and m have the same number of limbs, perhaps remove mpn_cmp call. */ if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) { /* Reduce possibly huge base while moving it to gp[0]. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ if (use_redc) { reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ MPN_ZERO (tp, mn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ } else { reduce (gp, bp, bn, mp, mn); } } else { /* |b| < m. We pad out operands to become mn limbs, which simplifies the rest of the function, but slows things down when the |b| << m. */ if (use_redc) { MPN_ZERO (tp, mn); MPN_COPY (tp + mn, bp, bn); MPN_ZERO (tp + mn + bn, mn - bn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); } else { MPN_COPY (gp, bp, bn); MPN_ZERO (gp + bn, mn - bn); } } /* Compute xx^i for odd g < 2^i. */ xp = TMP_ALLOC_LIMBS (mn); mpn_sqr_n (tp, gp, mn); if (use_redc) redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */ else mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); this_gp = gp; for (i = 1; i < K / 2; i++) { mpn_mul_n (tp, this_gp, xp, mn); this_gp += mn; if (use_redc) redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */ else mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); } /* Start the real stuff. */ ep = PTR (e); i = en - 1; /* current index */ c = ep[i]; /* current limb */ sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ sh -= k; /* index of lower bit of ep[i] to take into account */ if (sh < 0) { /* k-sh extra bits are needed */ if (i > 0) { i--; c <<= (-sh); sh += GMP_NUMB_BITS; c |= ep[i] >> sh; } }