int mpfr_urandomb (mpfr_ptr rop, gmp_randstate_t rstate) { mpfr_limb_ptr rp; mpfr_prec_t nbits; mp_size_t nlimbs; mp_size_t k; /* number of high zero limbs */ mpfr_exp_t exp; int cnt; rp = MPFR_MANT (rop); nbits = MPFR_PREC (rop); nlimbs = MPFR_LIMB_SIZE (rop); MPFR_SET_POS (rop); cnt = nlimbs * GMP_NUMB_BITS - nbits; /* Uniform non-normalized significand */ /* generate exactly nbits so that the random generator stays in the same state, independent of the machine word size GMP_NUMB_BITS */ mpfr_rand_raw (rp, rstate, nbits); if (MPFR_LIKELY (cnt != 0)) /* this will put the low bits to zero */ mpn_lshift (rp, rp, nlimbs, cnt); /* Count the null significant limbs and remaining limbs */ exp = 0; k = 0; while (nlimbs != 0 && rp[nlimbs - 1] == 0) { k ++; nlimbs --; exp -= GMP_NUMB_BITS; } if (MPFR_LIKELY (nlimbs != 0)) /* otherwise value is zero */ { count_leading_zeros (cnt, rp[nlimbs - 1]); /* Normalization */ if (mpfr_set_exp (rop, exp - cnt)) { /* If the exponent is not in the current exponent range, we choose to return a NaN as this is probably a user error. Indeed this can happen only if the exponent range has been reduced to a very small interval and/or the precision is huge (very unlikely). */ MPFR_SET_NAN (rop); __gmpfr_flags |= MPFR_FLAGS_NAN; /* Can't use MPFR_RET_NAN */ return 1; } if (cnt != 0) mpn_lshift (rp + k, rp, nlimbs, cnt); if (k != 0) MPN_ZERO (rp, k); } else MPFR_SET_ZERO (rop); return 0; }

void mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n) { mp_ptr lp, l1p, f1p; mp_size_t size; mp_limb_t c; TMP_DECL; ASSERT (ln != lnsub1); /* handle small n quickly, and hide the special case for L[-1]=-1 */ if (n <= FIB_TABLE_LUCNUM_LIMIT) { mp_limb_t f = FIB_TABLE (n); mp_limb_t f1 = FIB_TABLE ((int) n - 1); /* L[n] = F[n] + 2F[n-1] */ PTR(ln)[0] = f + 2*f1; SIZ(ln) = 1; /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */ PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1); SIZ(lnsub1) = (n == 0 ? -1 : 1); return; } TMP_MARK; size = MPN_FIB2_SIZE (n); f1p = TMP_ALLOC_LIMBS (size); MPZ_REALLOC (ln, size+1); MPZ_REALLOC (lnsub1, size+1); lp = PTR(ln); l1p = PTR(lnsub1); size = mpn_fib2_ui (l1p, f1p, n); /* L[n] = F[n] + 2F[n-1] */ #if HAVE_NATIVE_mpn_addlsh1_n c = mpn_addlsh1_n (lp, l1p, f1p, size); #else c = mpn_lshift (lp, f1p, size, 1); c += mpn_add_n (lp, lp, l1p, size); #endif lp[size] = c; SIZ(ln) = size + (c != 0); /* L[n-1] = 2F[n] - F[n-1] */ c = mpn_lshift (l1p, l1p, size, 1); c -= mpn_sub_n (l1p, l1p, f1p, size); ASSERT ((mp_limb_signed_t) c >= 0); l1p[size] = c; SIZ(lnsub1) = size + (c != 0); TMP_FREE; }

static void fp_double(element_ptr c, element_ptr a) { eptr ad = (eptr)a->data, cd = (eptr)c->data; if (!ad->flag) { cd->flag = 0; } else { fptr p = (fptr)c->field->data; const size_t t = p->limbs; if (mpn_lshift(cd->d, ad->d, t, 1)) { cd->flag = 2; // Again, assumes result is not zero. mpn_sub_n(cd->d, cd->d, p->primelimbs, t); } else { int i = mpn_cmp(cd->d, p->primelimbs, t); if (!i) { cd->flag = 0; } else { cd->flag = 2; if (i > 0) { mpn_sub_n(cd->d, cd->d, p->primelimbs, t); } } } } }

/* * Set f to z, choosing the smallest precision for f * so that z = f*(2^BPML)*zs*2^(RetVal) */ static int set_z (mpfr_ptr f, mpz_srcptr z, mp_size_t *zs) { mp_limb_t *p; mp_size_t s; int c; mpfr_prec_t pf; MPFR_ASSERTD (mpz_sgn (z) != 0); /* Remove useless ending 0 */ for (p = PTR (z), s = *zs = ABS (SIZ (z)) ; *p == 0; p++, s--) MPFR_ASSERTD (s >= 0); /* Get working precision */ count_leading_zeros (c, p[s-1]); pf = s * GMP_NUMB_BITS - c; if (pf < MPFR_PREC_MIN) pf = MPFR_PREC_MIN; mpfr_init2 (f, pf); /* Copy Mantissa */ if (MPFR_LIKELY (c)) mpn_lshift (MPFR_MANT (f), p, s, c); else MPN_COPY (MPFR_MANT (f), p, s); MPFR_SET_SIGN (f, mpz_sgn (z)); MPFR_SET_EXP (f, 0); return -c; }

/* Put in rp[n..2n-1] an approximation of the n high limbs of {np, n}^2. The error is less than n ulps of rp[n]. */ void mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n) { mp_size_t k; MPFR_STAT_STATIC_ASSERT (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */ k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n] : (n+4)/2; /* ensures that k >= (n+3)/2 */ MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n)); if (k < 0) /* we can't use mpn_sqr_basecase here, since it requires n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD is not exported by GMP */ mpn_sqr_n (rp, np, n); else if (k == 0) mpfr_mulhigh_n_basecase (rp, np, np, n); else { mp_size_t l = n - k; mp_limb_t cy; mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */ /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */ cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1); cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } }

/* truncates, returns inexact */ int _arf_get_integer_mpn(mp_ptr y, mp_srcptr x, mp_size_t xn, slong exp) { slong bot_exp = exp - xn * FLINT_BITS; if (bot_exp >= 0) { mp_size_t bot_limbs; mp_bitcnt_t bot_bits; bot_limbs = bot_exp / FLINT_BITS; bot_bits = bot_exp % FLINT_BITS; flint_mpn_zero(y, bot_limbs); if (bot_bits == 0) flint_mpn_copyi(y + bot_limbs, x, xn); else y[bot_limbs + xn] = mpn_lshift(y + bot_limbs, x, xn, bot_bits); /* exact */ return 0; } else if (exp <= 0) { /* inexact */ return 1; } else { mp_size_t top_limbs; mp_bitcnt_t top_bits; mp_limb_t cy; top_limbs = exp / FLINT_BITS; top_bits = exp % FLINT_BITS; if (top_bits == 0) { flint_mpn_copyi(y, x + xn - top_limbs, top_limbs); /* inexact */ return 1; } else { /* can be inexact */ cy = mpn_rshift(y, x + xn - top_limbs - 1, top_limbs + 1, FLINT_BITS - top_bits); return (cy != 0) || (top_limbs + 1 != xn); } } }

void tc4_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_size_t bits) { if (xn == 0) *rn = 0; else { mp_size_t xu = ABS(xn); mp_limb_t msl = mpn_lshift(rp, xp, xu, bits); if (msl) { rp[xu] = msl; *rn = (xn >= 0 ? xn + 1 : xn - 1); } else *rn = xn; } }

/* Perform left-shift operation on MPN * * pre-conditions: * - 0 < count * - rn = sn + ceil(count / GMP_NUMB_BITS) * - sn > 0 * * return value: most-significant limb stored in {rp,rn} result */ mp_limb_t integer_gmp_mpn_lshift (mp_limb_t rp[], const mp_limb_t sp[], const mp_size_t sn, const mp_bitcnt_t count) { const mp_size_t limb_shift = count / GMP_NUMB_BITS; const unsigned int bit_shift = count % GMP_NUMB_BITS; const mp_size_t rn0 = sn + limb_shift; memset(rp, 0, limb_shift*sizeof(mp_limb_t)); if (bit_shift) { const mp_limb_t msl = mpn_lshift(&rp[limb_shift], sp, sn, bit_shift); rp[rn0] = msl; return msl; } else { memcpy(&rp[limb_shift], sp, sn*sizeof(mp_limb_t)); return rp[rn0-1]; } }

static int mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { /* Old implementation */ int sign_product, cc, inexact; mpfr_exp_t ax; mp_limb_t *tmp; mp_limb_t b1; mpfr_prec_t bq, cq; mp_size_t bn, cn, tn, k; MPFR_TMP_DECL(marker); /* deal with special cases */ if (MPFR_ARE_SINGULAR(b,c)) { if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c)) { MPFR_SET_NAN(a); MPFR_RET_NAN; } sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) ); if (MPFR_IS_INF(b)) { if (MPFR_IS_INF(c) || MPFR_NOTZERO(c)) { MPFR_SET_SIGN(a,sign_product); MPFR_SET_INF(a); MPFR_RET(0); /* exact */ } else { MPFR_SET_NAN(a); MPFR_RET_NAN; } } else if (MPFR_IS_INF(c)) { if (MPFR_NOTZERO(b)) { MPFR_SET_SIGN(a, sign_product); MPFR_SET_INF(a); MPFR_RET(0); /* exact */ } else { MPFR_SET_NAN(a); MPFR_RET_NAN; } } else { MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); MPFR_SET_SIGN(a, sign_product); MPFR_SET_ZERO(a); MPFR_RET(0); /* 0 * 0 is exact */ } } sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) ); ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c); bq = MPFR_PREC(b); cq = MPFR_PREC(c); MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */ bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */ cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */ k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */ tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; /* <= k, thus no int overflow */ MPFR_ASSERTD(tn <= k); /* Check for no size_t overflow*/ MPFR_ASSERTD((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB); MPFR_TMP_MARK(marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB); /* multiplies two mantissa in temporary allocated space */ b1 = (MPFR_LIKELY(bn >= cn)) ? mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn) : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn); /* now tmp[0]..tmp[k-1] contains the product of both mantissa, with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */ b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */ /* if the mantissas of b and c are uniformly distributed in ]1/2, 1], then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ tmp += k - tn; if (MPFR_UNLIKELY(b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq, MPFR_IS_NEG_SIGN(sign_product), MPFR_PREC (a), rnd_mode, &inexact); /* cc = 1 ==> result is a power of two */ if (MPFR_UNLIKELY(cc)) MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT; MPFR_TMP_FREE(marker); { mpfr_exp_t ax2 = ax + (mpfr_exp_t) (b1 - 1 + cc); if (MPFR_UNLIKELY( ax2 > __gmpfr_emax)) return mpfr_overflow (a, rnd_mode, sign_product); if (MPFR_UNLIKELY( ax2 < __gmpfr_emin)) { /* In the rounding to the nearest mode, if the exponent of the exact result (i.e. before rounding, i.e. without taking cc into account) is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if both arguments are powers of 2), then round to zero. */ if (rnd_mode == MPFR_RNDN && (ax + (mpfr_exp_t) b1 < __gmpfr_emin || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c)))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, sign_product); } MPFR_SET_EXP (a, ax2); MPFR_SET_SIGN(a, sign_product); } MPFR_RET (inexact); }

void mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags, mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, mp_size_t w6n, mp_ptr tp) { mp_size_t m; mp_limb_t cy; m = 2*n + 1; #define w0 rp #define w2 (rp + 2*n) #define w6 (rp + 6*n) ASSERT (w6n > 0); ASSERT (w6n <= 2*n); /* Using formulas similar to Marco Bodrato's W5 = W5 + W4 W1 =(W4 - W1)/2 W4 = W4 - W0 W4 =(W4 - W1)/4 - W6*16 W3 =(W2 - W3)/2 W2 = W2 - W3 W5 = W5 - W2*65 May be negative. W2 = W2 - W6 - W0 W5 =(W5 + W2*45)/2 Now >= 0 again. W4 =(W4 - W2)/3 W2 = W2 - W4 W1 = W5 - W1 May be negative. W5 =(W5 - W3*8)/9 W3 = W3 - W5 W1 =(W1/15 + W5)/2 Now >= 0 again. W5 = W5 - W1 where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1), W4 = f(2), W5 = f(1/2), W6 = f(oo), Note that most intermediate results are positive; the ones that may be negative are represented in two's complement. We must never shift right a value that may be negative, since that would invalidate the sign bit. On the other hand, divexact by odd numbers work fine with two's complement. */ mpn_add_n (w5, w5, w4, m); if (flags & toom7_w1_neg) { #ifdef HAVE_NATIVE_mpn_rsh1add_n mpn_rsh1add_n (w1, w1, w4, m); #else mpn_add_n (w1, w1, w4, m); ASSERT (!(w1[0] & 1)); mpn_rshift (w1, w1, m, 1); #endif } else { #ifdef HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (w1, w4, w1, m); #else mpn_sub_n (w1, w4, w1, m); ASSERT (!(w1[0] & 1)); mpn_rshift (w1, w1, m, 1); #endif } mpn_sub (w4, w4, m, w0, 2*n); mpn_sub_n (w4, w4, w1, m); ASSERT (!(w4[0] & 3)); mpn_rshift (w4, w4, m, 2); /* w4>=0 */ tp[w6n] = mpn_lshift (tp, w6, w6n, 4); mpn_sub (w4, w4, m, tp, w6n+1); if (flags & toom7_w3_neg) { #ifdef HAVE_NATIVE_mpn_rsh1add_n mpn_rsh1add_n (w3, w3, w2, m); #else mpn_add_n (w3, w3, w2, m); ASSERT (!(w3[0] & 1)); mpn_rshift (w3, w3, m, 1); #endif } else { #ifdef HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (w3, w2, w3, m); #else mpn_sub_n (w3, w2, w3, m); ASSERT (!(w3[0] & 1)); mpn_rshift (w3, w3, m, 1); #endif } mpn_sub_n (w2, w2, w3, m); mpn_submul_1 (w5, w2, m, 65); mpn_sub (w2, w2, m, w6, w6n); mpn_sub (w2, w2, m, w0, 2*n); mpn_addmul_1 (w5, w2, m, 45); ASSERT (!(w5[0] & 1)); mpn_rshift (w5, w5, m, 1); mpn_sub_n (w4, w4, w2, m); mpn_divexact_by3 (w4, w4, m); mpn_sub_n (w2, w2, w4, m); mpn_sub_n (w1, w5, w1, m); mpn_lshift (tp, w3, m, 3); mpn_sub_n (w5, w5, tp, m); mpn_divexact_by9 (w5, w5, m); mpn_sub_n (w3, w3, w5, m); mpn_divexact_by15 (w1, w1, m); mpn_add_n (w1, w1, w5, m); ASSERT (!(w1[0] & 1)); mpn_rshift (w1, w1, m, 1); /* w1>=0 now */ mpn_sub_n (w5, w5, w1, m); /* These bounds are valid for the 4x4 polynomial product of toom44, * and they are conservative for toom53 and toom62. */ ASSERT (w1[2*n] < 2); ASSERT (w2[2*n] < 3); ASSERT (w3[2*n] < 4); ASSERT (w4[2*n] < 3); ASSERT (w5[2*n] < 2); /* Addition chain. Note carries and the 2n'th limbs that need to be * added in. * * Special care is needed for w2[2n] and the corresponding carry, * since the "simple" way of adding it all together would overwrite * the limb at wp[2*n] and rp[4*n] (same location) with the sum of * the high half of w3 and the low half of w4. * * 7 6 5 4 3 2 1 0 * | | | | | | | | | * ||w3 (2n+1)| * ||w4 (2n+1)| * ||w5 (2n+1)| ||w1 (2n+1)| * + | w6 (w6n)| ||w2 (2n+1)| w0 (2n) | (share storage with r) * ----------------------------------------------- * r | | | | | | | | | * c7 c6 c5 c4 c3 Carries to propagate */ cy = mpn_add_n (rp + n, rp + n, w1, m); MPN_INCR_U (w2 + n + 1, n , cy); cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n); MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy); cy = mpn_add_n (rp + 4*n, w3 + n, w4, n); MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy); cy = mpn_add_n (rp + 5*n, w4 + n, w5, n); MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy); if (w6n > n + 1) ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1)); else { ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n)); #if WANT_ASSERT { mp_size_t i; for (i = w6n; i <= n; i++) ASSERT (w5[n + i] == 0); } #endif } }

void _gst_mpz_gcd (gst_mpz *g, const gst_mpz *u, const gst_mpz *v) { int g_zero_bits, u_zero_bits, v_zero_bits; mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs; mp_ptr tp; mp_ptr up = u->d; mp_size_t usize = ABS (u->size); mp_ptr vp = v->d; mp_size_t vsize = ABS (v->size); mp_size_t gsize; /* GCD(0, V) == GCD (U, 1) == V. */ if (usize == 0 || (vsize == 1 && vp[0] == 1)) { gst_mpz_copy_abs (g, v); return; } /* GCD(U, 0) == GCD (1, V) == U. */ if (vsize == 0 || (usize == 1 && up[0] == 1)) { gst_mpz_copy_abs (g, u); return; } if (usize == 1) { gst_mpz_realloc (g, 1); g->size = 1; g->d[0] = mpn_gcd_1 (vp, vsize, up[0]); return; } if (vsize == 1) { gst_mpz_realloc (g, 1); g->size = 1; g->d[0] = mpn_gcd_1 (up, usize, vp[0]); return; } /* Eliminate low zero bits from U and V and move to temporary storage. */ u_zero_bits = mpn_scan1 (up, 0); u_zero_limbs = u_zero_bits / BITS_PER_MP_LIMB; u_zero_bits &= BITS_PER_MP_LIMB - 1; up += u_zero_limbs; usize -= u_zero_limbs; /* Operands could be destroyed for big-endian case, but let's be tidy. */ tp = up; up = (mp_ptr) alloca (usize * SIZEOF_MP_LIMB_T); if (u_zero_bits != 0) { mpn_rshift (up, tp, usize, u_zero_bits); usize -= up[usize - 1] == 0; } else MPN_COPY (up, tp, usize); v_zero_bits = mpn_scan1 (vp, 0); v_zero_limbs = v_zero_bits / BITS_PER_MP_LIMB; v_zero_bits &= BITS_PER_MP_LIMB - 1; vp += v_zero_limbs; vsize -= v_zero_limbs; /* Operands could be destroyed for big-endian case, but let's be tidy. */ tp = vp; vp = (mp_ptr) alloca (vsize * SIZEOF_MP_LIMB_T); if (v_zero_bits != 0) { mpn_rshift (vp, tp, vsize, v_zero_bits); vsize -= vp[vsize - 1] == 0; } else MPN_COPY (vp, tp, vsize); if (u_zero_limbs > v_zero_limbs) { g_zero_limbs = v_zero_limbs; g_zero_bits = v_zero_bits; } else if (u_zero_limbs < v_zero_limbs) { g_zero_limbs = u_zero_limbs; g_zero_bits = u_zero_bits; } else /* Equal. */ { g_zero_limbs = u_zero_limbs; g_zero_bits = MIN (u_zero_bits, v_zero_bits); } /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */ vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1])) ? mpn_gcd (vp, vp, vsize, up, usize) : mpn_gcd (vp, up, usize, vp, vsize); /* Here G <-- V << (g_zero_limbs*BITS_PER_MP_LIMB + g_zero_bits). */ gsize = vsize + g_zero_limbs; if (g_zero_bits != 0) { mp_limb_t cy_limb; gsize += (vp[vsize - 1] >> (BITS_PER_MP_LIMB - g_zero_bits)) != 0; if (g->alloc < gsize) gst_mpz_realloc (g, gsize); MPN_ZERO (g->d, g_zero_limbs); tp = g->d + g_zero_limbs; cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits); if (cy_limb != 0) tp[vsize] = cy_limb; }

int mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { int sign, inexact; mpfr_exp_t ax, ax2; mp_limb_t *tmp; mp_limb_t b1; mpfr_prec_t bq, cq; mp_size_t bn, cn, tn, k; MPFR_TMP_DECL (marker); MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode), ("a[%#R]=%R inexact=%d", a, a, inexact)); /* deal with special cases */ if (MPFR_ARE_SINGULAR (b, c)) { if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c)) { MPFR_SET_NAN (a); MPFR_RET_NAN; } sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); if (MPFR_IS_INF (b)) { if (!MPFR_IS_ZERO (c)) { MPFR_SET_SIGN (a, sign); MPFR_SET_INF (a); MPFR_RET (0); } else { MPFR_SET_NAN (a); MPFR_RET_NAN; } } else if (MPFR_IS_INF (c)) { if (!MPFR_IS_ZERO (b)) { MPFR_SET_SIGN (a, sign); MPFR_SET_INF (a); MPFR_RET(0); } else { MPFR_SET_NAN (a); MPFR_RET_NAN; } } else { MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); MPFR_SET_SIGN (a, sign); MPFR_SET_ZERO (a); MPFR_RET (0); } } sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c); /* Note: the exponent of the exact result will be e = bx + cx + ec with ec in {-1,0,1} and the following assumes that e is representable. */ /* FIXME: Useful since we do an exponent check after ? * It is useful iff the precision is big, there is an overflow * and we are doing further mults...*/ #ifdef HUGE if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1)) return mpfr_overflow (a, rnd_mode, sign); if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2)) return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode, sign); #endif bq = MPFR_PREC (b); cq = MPFR_PREC (c); MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */ bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */ cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */ k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */ tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */ /* Check for no size_t overflow*/ MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB); MPFR_TMP_MARK (marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB); /* multiplies two mantissa in temporary allocated space */ if (MPFR_UNLIKELY (bn < cn)) { mpfr_srcptr z = b; mp_size_t zn = bn; b = c; bn = cn; c = z; cn = zn; } MPFR_ASSERTD (bn >= cn); if (MPFR_LIKELY (bn <= 2)) { if (bn == 1) { /* 1 limb * 1 limb */ umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); b1 = tmp[1]; } else if (MPFR_UNLIKELY (cn == 1)) { /* 2 limbs * 1 limb */ mp_limb_t t; umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]); add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t); b1 = tmp[2]; } else { /* 2 limbs * 2 limbs */ mp_limb_t t1, t2, t3; /* First 2 limbs * 1 limb */ umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]); add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1); /* Second, the other 2 limbs * 1 limb product */ umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]); umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]); add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3); /* Sum those two partial products */ add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2); tmp[3] += (tmp[2] < t1); b1 = tmp[3]; } b1 >>= (GMP_NUMB_BITS - 1); tmp += k - tn; if (MPFR_UNLIKELY (b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ } else /* Mulders' mulhigh. Disable if squaring, since it is not tuned for such a case */ if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))

void mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m) { mp_ptr xp, tp, qp, mp, bp; mp_size_t xn, tn, mn, bn; int m_zero_cnt; int c; mp_limb_t e; TMP_DECL; mp = PTR(m); mn = ABSIZ(m); if (mn == 0) DIVIDE_BY_ZERO; if (el == 0) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending on if MOD equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; return; } TMP_MARK; /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } bn = ABSIZ(b); bp = PTR(b); if (bn > mn) { /* Reduce possibly huge base. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn); reduce (new_bp, bp, bn, mp, mn); bp = new_bp; bn = mn; /* Canonicalize the base, since we are potentially going to multiply with it quite a few times. */ MPN_NORMALIZE (bp, bn); } if (bn == 0) { SIZ(r) = 0; TMP_FREE; return; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); xp = TMP_ALLOC_LIMBS (mn); qp = TMP_ALLOC_LIMBS (mn + 1); MPN_COPY (xp, bp, bn); xn = bn; e = el; count_leading_zeros (c, e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = BITS_PER_MP_LIMB - 1 - c; /* Main loop. */ /* If m is already normalized (high bit of high limb set), and b is the same size, but a bigger value, and e==1, then there's no modular reductions done and we can end up with a result out of range at the end. */ if (c == 0) { if (xn == mn && mpn_cmp (xp, mp, mn) >= 0) mpn_sub_n (xp, xp, mp, mn); goto finishup; } while (c != 0) { mpn_sqr (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } if ((mp_limb_signed_t) e < 0) { mpn_mul (tp, xp, xn, bp, bn); tn = xn + bn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } } e <<= 1; c--; } finishup: /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it with the original MOD. */ if (m_zero_cnt != 0) { mp_limb_t cy; cy = mpn_lshift (tp, xp, xn, m_zero_cnt); tp[xn] = cy; xn += cy != 0; if (xn < mn) { MPN_COPY (xp, tp, xn); } else { mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn); xn = mn; } mpn_rshift (xp, xp, xn, m_zero_cnt); } MPN_NORMALIZE (xp, xn); if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0) { mp = PTR(m); /* want original, unnormalized m */ mpn_sub (xp, mp, mn, xp, xn); xn = mn; MPN_NORMALIZE (xp, xn); } MPZ_REALLOC (r, xn); SIZ (r) = xn; MPN_COPY (PTR(r), xp, xn); TMP_FREE; }

// Cast num/den to an int, rounding towards nearest. All inputs are destroyed. Take a sqrt if desired. // The values array must consist of r numerators followed by one denominator. void snap_divs(RawArray<Quantized> result, RawArray<mp_limb_t,2> values, const bool take_sqrt) { assert(result.size()+1==values.m); // For division, we seek x s.t. // x-1/2 <= num/den <= x+1/2 // 2x-1 <= 2num/den <= 2x+1 // 2x-1 <= floor(2num/den) <= 2x+1 // 2x <= 1+floor(2num/den) <= 2x+2 // x <= (1+floor(2num/den))//2 <= x+1 // x = (1+floor(2num/den))//2 // In the sqrt case, we seek a nonnegative integer x s.t. // x-1/2 <= sqrt(num/den) < x+1/2 // 2x-1 <= sqrt(4num/den) < 2x+1 // Now the leftmost and rightmost expressions are integral, so we can take floors to get // 2x-1 <= floor(sqrt(4num/den)) < 2x+1 // Since sqrt is monotonic and maps integers to integers, floor(sqrt(floor(x))) = floor(sqrt(x)), so // 2x-1 <= floor(sqrt(floor(4num/den))) < 2x+1 // 2x <= 1+floor(sqrt(floor(4num/den))) < 2x+2 // x <= (1+floor(sqrt(floor(4num/den))))//2 < x+1 // x = (1+floor(sqrt(floor(4num/den))))//2 // Thus, both cases look like // x = (1+f(2**k*num/den))//2 // where k = 1 or 2 and f is some truncating integer op (division or division+sqrt). // Adjust denominator to be positive const auto raw_den = values[result.size()]; const bool den_negative = mp_limb_signed_t(raw_den.back())<0; if (den_negative) mpn_neg(raw_den.data(),raw_den.data(),raw_den.size()); const auto den = trim(raw_den); assert(den.size()); // Zero should be prevented by the caller // Prepare for divisions const auto q = GEODE_RAW_ALLOCA(values.n-den.size()+1,mp_limb_t), r = GEODE_RAW_ALLOCA(den.size(),mp_limb_t); // Compute each component of the result for (int i=0;i<result.size();i++) { // Adjust numerator to be positive const auto num = values[i]; const bool num_negative = mp_limb_signed_t(num.back())<0; if (take_sqrt && num_negative!=den_negative && !num.contains_only(0)) throw RuntimeError("perturbed_ratio: negative value in square root"); if (num_negative) mpn_neg(num.data(),num.data(),num.size()); // Add enough bits to allow round-to-nearest computation after performing truncating operations mpn_lshift(num.data(),num.data(),num.size(),take_sqrt?2:1); // Perform division mpn_tdiv_qr(q.data(),r.data(),0,num.data(),num.size(),den.data(),den.size()); const auto trim_q = trim(q); if (!trim_q.size()) { result[i] = 0; continue; } // Take sqrt if desired, reusing the num buffer const auto s = take_sqrt ? sqrt_helper(num,trim_q) : trim_q; // Verify that result lies in [-exact::bound,exact::bound]; const int ratio = sizeof(ExactInt)/sizeof(mp_limb_t); static_assert(ratio<=2,""); if (s.size() > ratio) goto overflow; const auto nn = ratio==2 && s.size()==2 ? s[0]|ExactInt(s[1])<<8*sizeof(mp_limb_t) : s[0], n = (1+nn)/2; if (uint64_t(n) > uint64_t(exact::bound)) goto overflow; // Done! result[i] = (num_negative==den_negative?1:-1)*Quantized(n); } return; overflow: throw OverflowError("perturbed_ratio: overflow in l'Hopital expansion"); }

/* returns 0 if result exact, non-zero otherwise */ int mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mpfr_rnd_t rnd_mode) { long i; int sh; mp_size_t xn, yn, dif; mp_limb_t *xp, *yp, *tmp, c, d; mpfr_exp_t exp; int inexact, middle = 1, nexttoinf; MPFR_TMP_DECL(marker); MPFR_LOG_FUNC (("x[%Pu]=%.*Rg u=%lu rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, u, rnd_mode), ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, inexact)); if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) { if (MPFR_IS_NAN (x)) { MPFR_SET_NAN (y); MPFR_RET_NAN; } else if (MPFR_IS_INF (x)) { MPFR_SET_INF (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); } else { MPFR_ASSERTD (MPFR_IS_ZERO(x)); if (u == 0) /* 0/0 is NaN */ { MPFR_SET_NAN(y); MPFR_RET_NAN; } else { MPFR_SET_ZERO(y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET(0); } } } else if (MPFR_UNLIKELY (u <= 1)) { if (u < 1) { /* x/0 is Inf since x != 0*/ MPFR_SET_INF (y); MPFR_SET_SAME_SIGN (y, x); mpfr_set_divby0 (); MPFR_RET (0); } else /* y = x/1 = x */ return mpfr_set (y, x, rnd_mode); } else if (MPFR_UNLIKELY (IS_POW2 (u))) return mpfr_div_2si (y, x, MPFR_INT_CEIL_LOG2 (u), rnd_mode); MPFR_SET_SAME_SIGN (y, x); MPFR_TMP_MARK (marker); xn = MPFR_LIMB_SIZE (x); yn = MPFR_LIMB_SIZE (y); xp = MPFR_MANT (x); yp = MPFR_MANT (y); exp = MPFR_GET_EXP (x); dif = yn + 1 - xn; /* we need to store yn+1 = xn + dif limbs of the quotient */ /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */ tmp = MPFR_TMP_LIMBS_ALLOC (yn + 1); c = (mp_limb_t) u; MPFR_ASSERTN (u == c); if (dif >= 0) c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */ else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */ c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c); inexact = (c != 0); /* First pass in estimating next bit of the quotient, in case of RNDN * * In case we just have the right number of bits (postpone this ?), * * we need to check whether the remainder is more or less than half * * the divisor. The test must be performed with a subtraction, so as * * to prevent carries. */ if (MPFR_LIKELY (rnd_mode == MPFR_RNDN)) { if (c < (mp_limb_t) u - c) /* We have u > c */ middle = -1; else if (c > (mp_limb_t) u - c) middle = 1; else middle = 0; /* exactly in the middle */ } /* If we believe that we are right in the middle or exact, we should check that we did not neglect any word of x (division large / 1 -> small). */ for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++) if (xp[i]) inexact = middle = 1; /* larger than middle */ /* If the high limb of the result is 0 (xp[xn-1] < u), remove it. Otherwise, compute the left shift to be performed to normalize. In the latter case, we discard some low bits computed. They contain information useful for the rounding, hence the updating of middle and inexact. */ if (tmp[yn] == 0) { MPN_COPY(yp, tmp, yn); exp -= GMP_NUMB_BITS; } else { int shlz; count_leading_zeros (shlz, tmp[yn]); /* shift left to normalize */ if (MPFR_LIKELY (shlz != 0)) { mp_limb_t w = tmp[0] << shlz; mpn_lshift (yp, tmp + 1, yn, shlz); yp[0] += tmp[0] >> (GMP_NUMB_BITS - shlz); if (w > (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1))) { middle = 1; } else if (w < (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1))) { middle = -1; } else { middle = (c != 0); } inexact = inexact || (w != 0); exp -= shlz; } else { /* this happens only if u == 1 and xp[xn-1] >=

/* ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1 needs (tp, 2n) temp space, everything reduced mod 2^b inputs, outputs are fully reduced N.B: 2n is not the same as 2b rounded up to nearest limb! */ inline static int mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, mpir_ui b, mp_ptr tp) { mp_size_t n, k; mp_limb_t c; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; TMP_DECL; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(!MPN_OVERLAP_P (xp, n, yp, n)); ASSERT(!MPN_OVERLAP_P (xp, n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #ifndef TUNE_PROGRAM_BUILD if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n)) { mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_ptr tx, ty, tz; TMP_MARK; tx = TMP_BALLOC_LIMBS(3*n + 3); ty = tx + n + 1; tz = ty + n + 1; MPN_COPY(ty, yp, n); MPN_COPY(tz, zp, n); ty[n] = 0; tz[n] = 0; while ((((mp_limb_t)1)<<depth) < b) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = b/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1); MPN_COPY(xp, tx, n); TMP_FREE; return tx[n]; } #endif if (yp == zp) mpn_sqr(tp, yp, n); else mpn_mul_n (tp, yp, zp, n); if (k == 0) { c = mpn_sub_n (xp, tp, tp + n, n); return mpn_add_1 (xp, xp, n, c); } c = tp[n - 1]; tp[n - 1] &= GMP_NUMB_MASK >> k; #if HAVE_NATIVE_mpn_sublsh_nc c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); #else { mp_limb_t c1; c1 = mpn_lshift (tp + n, tp + n, n, k); tp[n] |= c >> (GMP_NUMB_BITS - k); c = mpn_sub_n (xp, tp, tp + n, n) + c1; } #endif c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; return c; }

void mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v) { mpir_ui g_zero_bits, u_zero_bits, v_zero_bits; mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs; mp_ptr tp; mp_ptr up; mp_size_t usize; mp_ptr vp; mp_size_t vsize; mp_size_t gsize; TMP_DECL; up = PTR(u); usize = ABSIZ (u); vp = PTR(v); vsize = ABSIZ (v); /* GCD(0, V) == V. */ if (usize == 0) { SIZ (g) = vsize; if (g == v) return; MPZ_REALLOC (g, vsize); MPN_COPY (PTR (g), vp, vsize); return; } /* GCD(U, 0) == U. */ if (vsize == 0) { SIZ (g) = usize; if (g == u) return; MPZ_REALLOC (g, usize); MPN_COPY (PTR (g), up, usize); return; } if (usize == 1) { SIZ (g) = 1; PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]); return; } if (vsize == 1) { SIZ(g) = 1; PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]); return; } TMP_MARK; /* Eliminate low zero bits from U and V and move to temporary storage. */ while (*up == 0) up++; u_zero_limbs = up - PTR(u); usize -= u_zero_limbs; count_trailing_zeros (u_zero_bits, *up); tp = up; up = TMP_ALLOC_LIMBS (usize); if (u_zero_bits != 0) { mpn_rshift (up, tp, usize, u_zero_bits); usize -= up[usize - 1] == 0; } else MPN_COPY (up, tp, usize); while (*vp == 0) vp++; v_zero_limbs = vp - PTR (v); vsize -= v_zero_limbs; count_trailing_zeros (v_zero_bits, *vp); tp = vp; vp = TMP_ALLOC_LIMBS (vsize); if (v_zero_bits != 0) { mpn_rshift (vp, tp, vsize, v_zero_bits); vsize -= vp[vsize - 1] == 0; } else MPN_COPY (vp, tp, vsize); if (u_zero_limbs > v_zero_limbs) { g_zero_limbs = v_zero_limbs; g_zero_bits = v_zero_bits; } else if (u_zero_limbs < v_zero_limbs) { g_zero_limbs = u_zero_limbs; g_zero_bits = u_zero_bits; } else /* Equal. */ { g_zero_limbs = u_zero_limbs; g_zero_bits = MIN (u_zero_bits, v_zero_bits); } /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */ vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1])) ? mpn_gcd (vp, vp, vsize, up, usize) : mpn_gcd (vp, up, usize, vp, vsize); /* Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits). */ gsize = vsize + g_zero_limbs; if (g_zero_bits != 0) { mp_limb_t cy_limb; gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0; MPZ_REALLOC (g, gsize); MPN_ZERO (PTR (g), g_zero_limbs); tp = PTR(g) + g_zero_limbs; cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits); if (cy_limb != 0) tp[vsize] = cy_limb; }

void mpn_toom53_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; int vm1_neg, vmh_neg; mp_limb_t cy; mp_ptr gp, hp; mp_ptr as1, asm1, as2, ash, asmh; mp_ptr bs1, bsm1, bs2, bsh, bsmh; enum toom4_flags flags; TMP_DECL; #define a0 ap #define a1 (ap + n) #define a2 (ap + 2*n) #define a3 (ap + 3*n) #define a4 (ap + 4*n) #define b0 bp #define b1 (bp + n) #define b2 (bp + 2*n) n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); s = an - 4 * n; t = bn - 2 * n; ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); TMP_MARK; as1 = TMP_SALLOC_LIMBS (n + 1); asm1 = TMP_SALLOC_LIMBS (n + 1); as2 = TMP_SALLOC_LIMBS (n + 1); ash = TMP_SALLOC_LIMBS (n + 1); asmh = TMP_SALLOC_LIMBS (n + 1); bs1 = TMP_SALLOC_LIMBS (n + 1); bsm1 = TMP_SALLOC_LIMBS (n + 1); bs2 = TMP_SALLOC_LIMBS (n + 1); bsh = TMP_SALLOC_LIMBS (n + 1); bsmh = TMP_SALLOC_LIMBS (n + 1); gp = pp; hp = pp + n + 1; /* Compute as1 and asm1. */ gp[n] = mpn_add_n (gp, a0, a2, n); gp[n] += mpn_add (gp, gp, n, a4, s); hp[n] = mpn_add_n (hp, a1, a3, n); #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (as1, asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_addsub_n (as1, asm1, gp, hp, n + 1); vm1_neg = 0; } #else mpn_add_n (as1, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_sub_n (asm1, gp, hp, n + 1); vm1_neg = 0; } #endif /* Compute as2. */ #if !HAVE_NATIVE_mpn_addlsh_n ash[n] = mpn_lshift (ash, a2, n, 2); /* 4a2 */ #endif #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n (as2, a3, a4, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n); cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n); as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); #else cy = mpn_lshift (as2, a4, s, 1); cy += mpn_add_n (as2, a3, as2, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 4 * cy + mpn_lshift (as2, as2, n, 2); cy += mpn_add_n (as2, a1, as2, n); cy = 2 * cy + mpn_lshift (as2, as2, n, 1); as2[n] = cy + mpn_add_n (as2, a0, as2, n); mpn_add_n (as2, ash, as2, n + 1); #endif /* Compute ash and asmh. */ #if HAVE_NATIVE_mpn_addlsh_n cy = mpn_addlsh_n (gp, a2, a0, n, 2); /* 4a0 + a2 */ cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2); /* 16a0 + 4a2 + a4 */ /* FIXME s */ gp[n] = cy; cy = mpn_addlsh_n (hp, a3, a1, n, 2); /* 4a1 + a3 */ cy = 2 * cy + mpn_lshift (hp, hp, n, 1); /* 8a1 + 2a3 */ hp[n] = cy; #else gp[n] = mpn_lshift (gp, a0, n, 4); /* 16a0 */ mpn_add (gp, gp, n + 1, a4, s); /* 16a0 + a4 */ mpn_add_n (gp, ash, gp, n+1); /* 16a0 + 4a2 + a4 */ cy = mpn_lshift (hp, a1, n, 3); /* 8a1 */ cy += mpn_lshift (ash, a3, n, 1); /* 2a3 */ cy += mpn_add_n (hp, ash, hp, n); /* 8a1 + 2a3 */ hp[n] = cy; #endif #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (ash, asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_addsub_n (ash, asmh, gp, hp, n + 1); vmh_neg = 0; } #else mpn_add_n (ash, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_sub_n (asmh, gp, hp, n + 1); vmh_neg = 0; } #endif /* Compute bs1 and bsm1. */ bs1[n] = mpn_add (bs1, b0, n, b2, t); /* b0 + b2 */ #if HAVE_NATIVE_mpn_addsub_n if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0) { bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1; bsm1[n] = 0; vm1_neg ^= 1; }

void mpn_div_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch) { mp_ptr new_dp, new_np, tp, rp; mp_limb_t cy, dh, qh; mp_size_t new_nn, qn; gmp_pi1_t dinv; int cnt; TMP_DECL; TMP_MARK; ASSERT (nn >= dn); ASSERT (dn > 0); ASSERT (dp[dn - 1] != 0); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); ASSERT_ALWAYS (FUDGE >= 2); if (dn == 1) { mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); return; } qn = nn - dn + 1; /* Quotient size, high limb might be zero */ if (qn + FUDGE >= dn) { /* |________________________| |_______| */ new_np = scratch; dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np, nn, cnt); new_np[nn] = cy; new_nn = nn + (cy != 0); new_dp = TMP_ALLOC_LIMBS (dn); mpn_lshift (new_dp, dp, dn, cnt); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); } if (cy == 0) qp[qn - 1] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - dn; for (i = 0; i < n; i++) qp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ { if (new_np != np) MPN_COPY (new_np, np, nn); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); } qp[nn - dn] = qh; } } else { /* |________________________| |_________________| */ tp = TMP_ALLOC_LIMBS (qn + 1); new_np = scratch; new_nn = 2 * qn + 1; if (new_np == np) /* We need {np,nn} to remain untouched until the final adjustment, so we need to allocate separate space for new_np. */ new_np = TMP_ALLOC_LIMBS (new_nn + 1); dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); new_np[new_nn] = cy; new_nn += (cy != 0); new_dp = TMP_ALLOC_LIMBS (qn + 1); mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); if (qn + 1 == 2) { qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); } else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); } else { mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); } if (cy == 0) tp[qn] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - (qn + 1); for (i = 0; i < n; i++) tp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ {

void mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t ind; mp_limb_t cy, cy2, r30, r31; mp_ptr tp; mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1; TMP_DECL; sn = (n + 3) / 4; h1 = n - 3*sn; #define a0 (up) #define a1 (up + sn) #define a2 (up + 2*sn) #define a3 (up + 3*sn) #define b0 (vp) #define b1 (vp + sn) #define b2 (vp + 2*sn) #define b3 (vp + 3*sn) t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs TMP_MARK; tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1)); #define u2 (tp + 4*t4) #define u3 (tp + 4*t4 + (sn+1)) #define u4 (tp + 4*t4 + 2*(sn+1)) #define u5 (tp + 4*t4 + 3*(sn+1)) #define u6 (tp + 4*t4 + 4*(sn+1)) u6[sn] = mpn_add(u6, a1, sn, a3, h1); u5[sn] = mpn_add_n(u5, a2, a0, sn); mpn_add_n(u3, u5, u6, sn + 1); n4 = sn + 1; if (mpn_cmp(u5, u6, sn + 1) >= 0) mpn_sub_n(u4, u5, u6, sn + 1); else { mpn_sub_n(u4, u6, u5, sn + 1); n4 = -n4; } u6[sn] = mpn_add(u6, b1, sn, b3, h1); u5[sn] = mpn_add_n(u5, b2, b0, sn); mpn_add_n(r2, u5, u6, sn + 1); n5 = sn + 1; if (mpn_cmp(u5, u6, sn + 1) >= 0) mpn_sub_n(u5, u5, u6, sn + 1); else { mpn_sub_n(u5, u6, u5, sn + 1); n5 = -n5; } MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */ MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */ #if HAVE_NATIVE_mpn_addlsh_n r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2); mpn_lshift(r1, r1, sn + 1, 1); cy = mpn_addlsh_n(r2, a3, a1, h1, 2); #else r1[sn] = mpn_lshift(r1, a2, sn, 1); MPN_COPY(r2, a3, h1); r1[sn] += mpn_addmul_1(r1, a0, sn, 8); cy = mpn_addmul_1(r2, a1, h1, 4); #endif if (sn > h1) { cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2); cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy); } r2[sn] = cy; mpn_add_n(u5, r1, r2, sn + 1); n6 = sn + 1; if (mpn_cmp(r1, r2, sn + 1) >= 0) mpn_sub_n(u6, r1, r2, sn + 1); else { mpn_sub_n(u6, r2, r1, sn + 1); n6 = -n6; } #if HAVE_NATIVE_mpn_addlsh_n r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2); mpn_lshift(r1, r1, sn + 1, 1); cy = mpn_addlsh_n(r2, b3, b1, h1, 2); #else r1[sn] = mpn_lshift(r1, b2, sn, 1); MPN_COPY(r2, b3, h1); r1[sn] += mpn_addmul_1(r1, b0, sn, 8); cy = mpn_addmul_1(r2, b1, h1, 4); #endif if (sn > h1) { cy2 = mpn_lshift(r2 + h1, b1 + h1, sn - h1, 2); cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy); } r2[sn] = cy; mpn_add_n(u2, r1, r2, sn + 1); n8 = sn + 1; if (mpn_cmp(r1, r2, sn + 1) >= 0) mpn_sub_n(r2, r1, r2, sn + 1); else { mpn_sub_n(r2, r2, r1, sn + 1); n8 = -n8; } r30 = r3[0]; r31 = r3[1]; MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */ MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */ r3[1] = r31; #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(u2, a2, a3, h1); if (sn > h1) cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); u2[sn] = cy; u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn); u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn); #else MPN_COPY(u2, a0, sn); u2[sn] = mpn_addmul_1(u2, a1, sn, 2); u2[sn] += mpn_addmul_1(u2, a2, sn, 4); cy = mpn_addmul_1(u2, a3, h1, 8); if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy); u2[sn] += cy; #endif #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(r1, b2, b3, h1); if (sn > h1) cy = mpn_add_1(r1 + h1, b2 + h1, sn - h1, cy); r1[sn] = cy; r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn); r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn); #else MPN_COPY(r1, b0, sn); r1[sn] = mpn_addmul_1(r1, b1, sn, 2); r1[sn] += mpn_addmul_1(r1, b2, sn, 4); cy = mpn_addmul_1(r1, b3, h1, 8); if (sn > h1) cy = mpn_add_1(r1 + h1, r1 + h1, sn - h1, cy); r1[sn] += cy; #endif MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */ MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h1); /* oo */ MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */ TC4_DENORM(r1, n1, t4 - 1); /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != 2*n) { MPN_ZERO((rp + rpn), 2*n - rpn); } TMP_FREE; }

void mpn_toom63_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; mp_limb_t cy; int sign; /***************************** decomposition *******************************/ #define a5 (ap + 5 * n) #define b0 (bp + 0 * n) #define b1 (bp + 1 * n) #define b2 (bp + 2 * n) ASSERT (an >= bn); n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); s = an - 5 * n; t = bn - 2 * n; ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); /* WARNING! it assumes s+t>=n */ ASSERT ( s + t >= n ); ASSERT ( s + t > 4); /* WARNING! it assumes n>1 */ ASSERT ( n > 2); #define r8 pp /* 2n */ #define r7 scratch /* 3n+1 */ #define r5 (pp + 3*n) /* 3n+1 */ #define v0 (pp + 3*n) /* n+1 */ #define v1 (pp + 4*n+1) /* n+1 */ #define v2 (pp + 5*n+2) /* n+1 */ #define v3 (pp + 6*n+3) /* n+1 */ #define r3 (scratch + 3 * n + 1) /* 3n+1 */ #define r1 (pp + 7*n) /* s+t <= 2*n */ #define ws (scratch + 6 * n + 2) /* ??? */ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may need all of them, when DO_mpn_sublsh_n usea a scratch */ /* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */ /********************** evaluation and recursive calls *********************/ /* $\pm4$ */ sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */ /* FIXME: use addlsh */ v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */ if ( n == t ) v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */ else v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */ sign ^= abs_sub_add_n (v1, v3, pp, n + 1); TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */ TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4); /* $\pm1$ */ sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); /* Compute bs1 and bsm1. Code taken from toom33 */ cy = mpn_add (ws, b0, n, b2, t); #if HAVE_NATIVE_mpn_add_n_sub_n if (cy == 0 && mpn_cmp (ws, b1, n) < 0) { cy = mpn_add_n_sub_n (v3, v1, b1, ws, n); v3[n] = cy >> 1; v1[n] = 0; sign = ~sign; }

/* returns 0 if result exact, non-zero otherwise */ int mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mp_rnd_t rnd_mode) { long int xn, yn, dif, sh, i; mp_limb_t *xp, *yp, *tmp, c, d; mp_exp_t exp; int inexact, middle = 1; TMP_DECL(marker); if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) )) { if (MPFR_IS_NAN(x)) { MPFR_SET_NAN(y); MPFR_RET_NAN; } else if (MPFR_IS_INF(x)) { MPFR_SET_INF(y); MPFR_SET_SAME_SIGN(y, x); MPFR_RET(0); } else { MPFR_ASSERTD(MPFR_IS_ZERO(x)); if (u == 0)/* 0/0 is NaN */ { MPFR_SET_NAN(y); MPFR_RET_NAN; } else { MPFR_SET_ZERO(y); MPFR_RET(0); } } } if (MPFR_UNLIKELY(u == 0)) { /* x/0 is Inf */ MPFR_SET_INF(y); MPFR_SET_SAME_SIGN(y, x); MPFR_RET(0); } MPFR_CLEAR_FLAGS(y); MPFR_SET_SAME_SIGN(y, x); TMP_MARK(marker); xn = MPFR_LIMB_SIZE(x); yn = MPFR_LIMB_SIZE(y); xp = MPFR_MANT(x); yp = MPFR_MANT(y); exp = MPFR_GET_EXP (x); dif = yn + 1 - xn; /* we need to store yn+1 = xn + dif limbs of the quotient */ /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */ tmp = (mp_limb_t*) TMP_ALLOC((yn + 1) * BYTES_PER_MP_LIMB); c = (mp_limb_t) u; MPFR_ASSERTN(u == c); if (dif >= 0) c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */ else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */ c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c); inexact = (c != 0); /* First pass in estimating next bit of the quotient, in case of RNDN * * In case we just have the right number of bits (postpone this ?), * * we need to check whether the remainder is more or less than half * * the divisor. The test must be performed with a subtraction, so as * * to prevent carries. */ if (rnd_mode == GMP_RNDN) { if (c < (mp_limb_t) u - c) /* We have u > c */ middle = -1; else if (c > (mp_limb_t) u - c) middle = 1; else middle = 0; /* exactly in the middle */ } /* If we believe that we are right in the middle or exact, we should check that we did not neglect any word of x (division large / 1 -> small). */ for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++) if (xp[i]) inexact = middle = 1; /* larger than middle */ /* If the high limb of the result is 0 (xp[xn-1] < u), remove it. Otherwise, compute the left shift to be performed to normalize. In the latter case, we discard some low bits computed. They contain information useful for the rounding, hence the updating of middle and inexact. */ if (tmp[yn] == 0) { MPN_COPY(yp, tmp, yn); exp -= BITS_PER_MP_LIMB; sh = 0; } else { count_leading_zeros (sh, tmp[yn]); /* shift left to normalize */ if (sh) { mp_limb_t w = tmp[0] << sh; mpn_lshift (yp, tmp + 1, yn, sh); yp[0] += tmp[0] >> (BITS_PER_MP_LIMB - sh); if (w > (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1))) { middle = 1; } else if (w < (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1))) { middle = -1; } else { middle = (c != 0); } inexact = inexact || (w != 0); exp -= sh; } else { /* this happens only if u == 1 and xp[xn-1] >= 1<<(BITS_PER_MP_LIMB-1). It might be better to handle the u == 1 case seperately ? */ MPN_COPY (yp, tmp + 1, yn); } }

/* Obtain a sequence of random numbers. */ static void randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits) { unsigned long int rbitpos; int chunk_nbits; mp_ptr tp; mp_size_t tn; gmp_rand_lc_struct *p; TMP_DECL; p = (gmp_rand_lc_struct *) RNG_STATE (rstate); TMP_MARK; chunk_nbits = p->_mp_m2exp / 2; tn = BITS_TO_LIMBS (chunk_nbits); tp = TMP_ALLOC_LIMBS (tn); rbitpos = 0; while (rbitpos + chunk_nbits <= nbits) { mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS; if (rbitpos % GMP_NUMB_BITS != 0) { mp_limb_t savelimb, rcy; /* Target of new chunk is not bit aligned. Use temp space and align things by shifting it up. */ lc (tp, rstate); savelimb = r2p[0]; rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS); r2p[0] |= savelimb; /* bogus */ if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS) > GMP_NUMB_BITS) r2p[tn] = rcy; } else { /* Target of new chunk is bit aligned. Let `lc' put bits directly into our target variable. */ lc (r2p, rstate); } rbitpos += chunk_nbits; } /* Handle last [0..chunk_nbits) bits. */ if (rbitpos != nbits) { mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS; int last_nbits = nbits - rbitpos; tn = BITS_TO_LIMBS (last_nbits); lc (tp, rstate); if (rbitpos % GMP_NUMB_BITS != 0) { mp_limb_t savelimb, rcy; /* Target of new chunk is not bit aligned. Use temp space and align things by shifting it up. */ savelimb = r2p[0]; rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS); r2p[0] |= savelimb; if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits) r2p[tn] = rcy; } else { MPN_COPY (r2p, tp, tn); } /* Mask off top bits if needed. */ if (nbits % GMP_NUMB_BITS != 0) rp[nbits / GMP_NUMB_BITS] &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS); } TMP_FREE; }