void mpz_combit (mpz_ptr d, unsigned long int bit_index) { mp_size_t dsize = ABSIZ(d); mp_ptr dp = LIMBS(d); mp_size_t limb_index = bit_index / GMP_NUMB_BITS; mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS)); if (limb_index >= dsize) { MPZ_REALLOC(d, limb_index + 1); dp = LIMBS(d); MPN_ZERO(dp + dsize, limb_index + 1 - dsize); dsize = limb_index + 1; } if (SIZ(d) >= 0) { dp[limb_index] ^= bit; MPN_NORMALIZE (dp, dsize); SIZ(d) = dsize; } else { mp_limb_t x = -dp[limb_index]; mp_size_t i; /* non-zero limb below us means ones-complement */ for (i = limb_index-1; i >= 0; i--) if (dp[i] != 0) { x--; /* change twos comp to ones comp */ break; } if (x & bit) { mp_limb_t c; /* Clearing the bit increases the magitude. We might need a carry. */ MPZ_REALLOC(d, dsize + 1); dp = LIMBS(d); __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index, dsize - limb_index, bit); dp[dsize] = c; dsize += c; } else /* Setting the bit decreases the magnitude */ mpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit); MPN_NORMALIZE (dp, dsize); SIZ(d) = -dsize; } }

/* c is the top bits of the inputs, (fully reduced) c & 2 is the top bit of y c & 1 is the top bit of z */ int mpn_mulmod_2expp1_basecase (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, int c, mpir_ui b, mp_ptr tp) { int cy, cz; mp_size_t n, k; cy = c & 2; cz = c & 1; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #if WANT_ASSERT { mp_size_t t = n; MPN_NORMALIZE(yp, t); ASSERT(cy == 0 || t == 0); t = n; MPN_NORMALIZE(zp, t); ASSERT(cz == 0 || t == 0); } #endif if (LIKELY (cy == 0)) { if (LIKELY (cz == 0)) { c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp); } else { c = mpn_neg_n (xp, yp, n); c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; } } else { if (LIKELY (cz == 0))

void mpn_trace_file (const char *filename, mp_srcptr ptr, mp_size_t size) { FILE *fp; mpz_t z; fp = fopen (filename, "w"); if (fp == NULL) { perror ("fopen"); abort(); } MPN_NORMALIZE (ptr, size); PTR(z) = (mp_ptr) ptr; SIZ(z) = (int) size; mpz_out_str (fp, mp_trace_base, z); fprintf (fp, "\n"); if (ferror (fp) || fclose (fp) != 0) { printf ("error writing %s\n", filename); abort(); } }

void _tc4_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n) { mp_limb_t cy; mp_size_t s1 = ABS(r1n); mp_size_t s2 = ABS(r2n); if (!s1) { *rn = 0; } else if (!s2) { if (rp != r1) MPN_COPY(rp, r1, s1); *rn = r1n; } else if ((r1n ^ r2n) >= 0) { *rn = r1n; cy = mpn_add(rp, r1, s1, r2, s2); if (cy) { rp[s1] = cy; if ((*rn) < 0) (*rn)--; else (*rn)++; } } else { mp_size_t ct; if (s1 != s2) ct = 1; else MPN_CMP(ct, r1, r2, s1); if (!ct) *rn = 0; else if (ct > 0) { mpn_sub(rp, r1, s1, r2, s2); *rn = s1; MPN_NORMALIZE(rp, (*rn)); if (r1n < 0) *rn = -(*rn); } else { mpn_sub_n(rp, r2, r1, s1); *rn = s1; MPN_NORMALIZE(rp, (*rn)); if (r1n > 0) *rn = -(*rn); } } }

void mpz_limbs_finish (mpz_ptr x, mp_size_t n) { assert (n >= 0); MPN_NORMALIZE (PTR(x), n); SIZ (x) = n; }

static int mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize) { mp_srcptr ap = a->_mp_d; mp_size_t asize = a->_mp_size; MPN_NORMALIZE (bp, bsize); return asize == bsize && mpn_cmp (ap, bp, asize) == 0; }

void mpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size) { ASSERT (size >= 0); MPN_NORMALIZE (p, size); MPZ_REALLOC (z, size); MPN_COPY (PTR(z), p, size); SIZ(z) = size; }

void mpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size) { ASSERT (size >= 0); MPN_NORMALIZE (p, size); ALLOC(z) = MAX (size, 1); PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z)); SIZ(z) = size; MPN_COPY (PTR(z), p, size); }

mpz_srcptr mpz_roinit_n (mpz_ptr x, mp_srcptr xp, mp_size_t xs) { mp_size_t xn = ABS(xs); MPN_NORMALIZE (xp, xn); ALLOC (x) = 0; SIZ (x) = xs < 0 ? -xn : xn; PTR (x) = (mp_ptr) xp; return x; }

/* Needs some ugly casts. */ mpz_srcptr mpz_roinit_n (mpz_ptr x, const mp_limb_t *xp, mp_size_t xs) { mp_size_t xn = ABS (xs); MPN_NORMALIZE (xp, xn); x->_mp_size = xs < 0 ? -xn : xn; x->_mp_alloc = 0; x->_mp_d = (mp_limb_t *) xp; return x; }

void mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den) { mp_ptr qp; mp_size_t qn; mp_srcptr np, dp; mp_size_t nn, dn; TMP_DECL; #if WANT_ASSERT { mpz_t rem; mpz_init (rem); mpz_tdiv_r (rem, num, den); ASSERT (SIZ(rem) == 0); mpz_clear (rem); } #endif nn = ABSIZ (num); dn = ABSIZ (den); if (nn < dn) { /* This special case avoids segfaults below when the function is incorrectly called with |N| < |D|, N != 0. It also handles the well-defined case N = 0. */ SIZ(quot) = 0; return; } qn = nn - dn + 1; TMP_MARK; if (quot == num || quot == den) qp = TMP_ALLOC_LIMBS (qn); else qp = MPZ_REALLOC (quot, qn); np = PTR(num); dp = PTR(den); mpn_divexact (qp, np, nn, dp, dn); MPN_NORMALIZE (qp, qn); if (qp != PTR(quot)) MPN_COPY (MPZ_REALLOC (quot, qn), qp, qn); SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn; TMP_FREE; }

/* this function is useful in debug mode to print non-normalized residues */ static void mpresn_print (mpres_t x, mpmod_t n) { mp_size_t m, xn; xn = SIZ(x); m = ABSIZ(x); MPN_NORMALIZE(PTR(x), m); SIZ(x) = xn >= 0 ? m : -m; gmp_printf ("%Zd\n", x); SIZ(x) = xn; }

void mpz_urandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long int nbits) { mp_ptr rp; mp_size_t size; size = BITS_TO_LIMBS (nbits); rp = MPZ_REALLOC (rop, size); _gmp_rand (rp, rstate, nbits); MPN_NORMALIZE (rp, size); SIZ (rop) = size; }

/* Print "name=value\n" to stdout for an mpn style ptr,size. */ void mpn_trace (const char *name, mp_srcptr ptr, mp_size_t size) { mpz_t z; if (ptr == NULL) { mpz_trace (name, NULL); return; } MPN_NORMALIZE (ptr, size); PTR(z) = (mp_ptr) ptr; SIZ(z) = size; ALLOC(z) = size; mpz_trace (name, z); }

void mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt) { mp_size_t in_size = ABS (in->_mp_size); mp_size_t res_size; mp_size_t limb_cnt = cnt / GMP_NUMB_BITS; mp_srcptr in_ptr = in->_mp_d; if (in_size > limb_cnt) { /* The input operand is (probably) greater than 2**CNT. */ mp_limb_t x; x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % GMP_NUMB_BITS) - 1); if (x != 0) { res_size = limb_cnt + 1; if (res->_mp_alloc < res_size) _mpz_realloc (res, res_size); res->_mp_d[limb_cnt] = x; } else { res_size = limb_cnt; MPN_NORMALIZE (in_ptr, res_size); if (res->_mp_alloc < res_size) _mpz_realloc (res, res_size); limb_cnt = res_size; } } else { /* The input operand is smaller than 2**CNT. We perform a no-op, apart from that we might need to copy IN to RES. */ res_size = in_size; if (res->_mp_alloc < res_size) _mpz_realloc (res, res_size); limb_cnt = res_size; } if (res != in) MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt); res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size; }

/**************** * Shift A by N bits to the right * FIXME: should use alloc_limb if X and A are same. */ void mpi_rshift( MPI x, MPI a, unsigned n ) { mpi_ptr_t xp; mpi_size_t xsize; xsize = a->nlimbs; x->sign = a->sign; RESIZE_IF_NEEDED(x, xsize); xp = x->d; if( xsize ) { mpihelp_rshift( xp, a->d, xsize, n); MPN_NORMALIZE( xp, xsize); } x->nlimbs = xsize; }

/* this function is useful in debug mode to print residues */ static void mpres_print (mpres_t x, char* name, mpmod_t n) { mp_size_t m, xn; mpres_t t; mpres_init(t, n); mpz_set_ui(t, 1); mpres_mul (t, x, t, n); xn = SIZ(t); m = ABSIZ(t); MPN_NORMALIZE(PTR(t), m); SIZ(t) = xn >= 0 ? m : -m; gmp_printf ("%s=%Zd\n", name, t); SIZ(t) = xn; mpres_clear (t, n); }

void mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt) { mp_size_t in_size = ABSIZ (in); mp_size_t res_size; mp_size_t limb_cnt = cnt / GMP_NUMB_BITS; mp_srcptr in_ptr = PTR (in); if (in_size > limb_cnt) { /* The input operand is (probably) greater than 2**CNT. */ mp_limb_t x; x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % GMP_NUMB_BITS) - 1); if (x != 0) { res_size = limb_cnt + 1; MPZ_REALLOC (res, res_size); PTR (res)[limb_cnt] = x; } else { res_size = limb_cnt; MPN_NORMALIZE (in_ptr, res_size); MPZ_REALLOC (res, res_size); limb_cnt = res_size; } } else { /* The input operand is smaller than 2**CNT. We perform a no-op, apart from that we might need to copy IN to RES. */ res_size = in_size; MPZ_REALLOC (res, res_size); limb_cnt = res_size; } if (res != in) MPN_COPY (PTR (res), PTR (in), limb_cnt); SIZ (res) = SIZ (in) >= 0 ? res_size : -res_size; }

static void hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn, mp_srcptr qp, mp_size_t qn, int d) { ASSERT (!gp); ASSERT (d >= 0); ASSERT (d <= 1); MPN_NORMALIZE (qp, qn); if (qn > 0) { struct hgcd_matrix *M = (struct hgcd_matrix *) p; /* NOTES: This is a bit ugly. A tp area is passed to gcd_subdiv_step, which stores q at the start of that area. We now use the rest. */ mp_ptr tp = (mp_ptr) qp + qn; mpn_hgcd_matrix_update_q (M, qp, qn, d, tp); } }

void gmpmee_redc_decode (mpz_ptr r, mpz_srcptr u, gmpmee_redc_t mrt) { mp_ptr rp, mp; mp_size_t n; mpz_srcptr m; gmpmee_redc_mod (r, u, mrt); m = MPZ(mrt); n = SIZ(m); rp = PTR(r); mp = PTR(m); if (mpn_cmp (rp, mp, n) >= 0) mpn_sub_n (rp, rp, mp, n); MPN_NORMALIZE (rp, n); SIZ(r) = n; }

/* * Shift A by N bits to the left. */ void gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) { unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); unsigned int nbits = (n%BITS_PER_MPI_LIMB); if (x == a && !n) return; /* In-place shift with an amount of zero. */ if ( x != a ) { /* Copy A to X. */ unsigned int alimbs = a->nlimbs; int asign = a->sign; mpi_ptr_t xp, ap; RESIZE_IF_NEEDED (x, alimbs+nlimbs+1); xp = x->d; ap = a->d; MPN_COPY (xp, ap, alimbs); x->nlimbs = alimbs; x->flags = a->flags; x->sign = asign; } if (nlimbs && !nbits) { /* Shift a full number of limbs. */ _gcry_mpi_lshift_limbs (x, nlimbs); } else if (n) { /* We use a very dump approach: Shift left by the number of limbs plus one and than fix it up by an rshift. */ _gcry_mpi_lshift_limbs (x, nlimbs+1); gcry_mpi_rshift (x, x, BITS_PER_MPI_LIMB - nbits); } MPN_NORMALIZE (x->d, x->nlimbs); }

void tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn) { mp_size_t yu = ABS(*yn); mp_size_t xu = ABS(xn); mp_limb_t cy = 0; if (xn == 0) return; if (offset < yu) /* low part of x overlaps with y */ { if (offset + xu <= yu) /* x entirely inside y */ { cy = mpn_add_n (yp + offset, yp + offset, xp, xu); if (offset + xu < yu) cy = mpn_add_1 (yp + offset + xu, yp + offset + xu, yu - (offset + xu), cy); } else cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset); /* now cy is the carry at yp + yu */ if (xu + offset > yu) /* high part of x exceeds y */ { MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu); cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy); yu = xu + offset; } /* now cy is the carry at yp + yn */ if (cy) yp[yu++] = cy; MPN_NORMALIZE(yp, yu); *yn = yu; } else /* x does not overlap */ { if (offset > yu) MPN_ZERO (yp + yu, offset - yu); MPN_COPY (yp + offset, xp, xu); *yn = offset + xu; } }

void mpfr_extract (mpz_ptr y, mpfr_srcptr p, unsigned int i) { int two_i = 1 << i; int two_i_2 = i ? two_i / 2 : 1; mp_size_t size_p = MPFR_ABSSIZE(p); /* as 0 <= |p| < 1, we don't have to care with infinities, NaN, ... */ _mpz_realloc (y, two_i_2); if (size_p < two_i) { MPN_ZERO (PTR(y), two_i_2); if (size_p >= two_i_2) MPN_COPY (PTR(y) + two_i - size_p, MPFR_MANT(p), size_p - two_i_2); } else MPN_COPY (PTR(y), MPFR_MANT(p) + size_p - two_i, two_i_2); MPN_NORMALIZE (PTR(y), two_i_2); SIZ(y) = (MPFR_ISNEG(p)) ? -two_i_2 : two_i_2; }

void gcdext_get_t(mp_ptr t, mp_size_t * tn, mp_ptr gp, mp_size_t gn, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n, mp_ptr s, mp_size_t sn, mp_ptr tp) { mp_size_t ss = ABS(sn); mp_limb_t cy; if (ss >= an) mpn_mul(tp, s, ss, ap, an); else mpn_mul(tp, ap, an, s, ss); (*tn) = ss + an; (*tn) -= (tp[(*tn) - 1] == 0); /* We must have s*ap >= gp and we really want to compute -t */ if (sn > 0) { mpn_sub(tp, tp, *tn, gp, gn); MPN_NORMALIZE(tp, (*tn)); } else { cy = mpn_add(tp, tp, *tn, gp, gn); if (cy) tp[(*tn)++] = cy; } if ((*tn) == 0) { return; } mpn_tdiv_qr(t, tp, 0, tp, (*tn), bp, n); ASSERT_MPN_ZERO_P(tp, n); (*tn) -= (n - 1); (*tn) -= (t[(*tn) - 1] == 0); }

void mpfr_extract (mpz_ptr y, mpfr_srcptr p, unsigned int i) { unsigned long two_i = 1UL << i; unsigned long two_i_2 = i ? two_i / 2 : 1; mp_size_t size_p = MPFR_LIMB_SIZE (p); /* as 0 <= |p| < 1, we don't have to care with infinities, NaN, ... */ MPFR_ASSERTD (!MPFR_IS_SINGULAR (p)); _mpz_realloc (y, two_i_2); if ((mpfr_uexp_t) size_p < two_i) { MPN_ZERO (PTR(y), two_i_2); if ((mpfr_uexp_t) size_p >= two_i_2) MPN_COPY (PTR(y) + two_i - size_p, MPFR_MANT(p), size_p - two_i_2); } else MPN_COPY (PTR(y), MPFR_MANT(p) + size_p - two_i, two_i_2); MPN_NORMALIZE (PTR(y), two_i_2); SIZ(y) = (MPFR_IS_NEG (p)) ? -two_i_2 : two_i_2; }

void mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n) { mp_size_t len1, ind; mp_limb_t cy, r30, r31; mp_ptr tp; mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4; len1 = n; ASSERT (n >= 1); MPN_NORMALIZE(up, len1); sn = (n - 1) / 4 + 1; /* a0 - a3 are defined in mpn_toom4_mul_n above */ TC4_NORM(a0, a0n, sn); TC4_NORM(a1, a1n, sn); TC4_NORM(a2, a2n, sn); TC4_NORM(a3, a3n, n - 3*sn); t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1)); tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); tc4_sub(u3, &n3, u4, n4, u5, n5); SQR_TC4(r4, n4, u3, n3); SQR_TC4_UNSIGNED(r3, n3, u2, n2); tc4_lshift(r1, &n1, a0, a0n, 3); tc4_addlsh1_unsigned(r1, &n1, a2, a2n); tc4_lshift(r2, &n8, a1, a1n, 2); tc4_add(r2, &n8, r2, n8, a3, a3n); tc4_add(u4, &n9, r1, n1, r2, n8); tc4_sub(u5, &n5, r1, n1, r2, n8); r30 = r3[0]; if (!n3) r30 = CNST_LIMB(0); r31 = r3[1]; SQR_TC4(r6, n6, u5, n5); SQR_TC4_UNSIGNED(r5, n5, u4, n9); r3[1] = r31; tc4_lshift(u2, &n8, a3, a3n, 3); tc4_addmul_1(u2, &n8, a2, a2n, 4); tc4_addlsh1_unsigned(u2, &n8, a1, a1n); tc4_add(u2, &n8, u2, n8, a0, a0n); SQR_TC4_UNSIGNED(r2, n2, u2, n8); SQR_TC4_UNSIGNED(r1, n1, a3, a3n); SQR_TC4_UNSIGNED(r7, n7, a0, a0n); TC4_DENORM(r1, n1, t4 - 1); TC4_DENORM(r2, n2, t4 - 1); if (n3) TC4_DENORM(r3, n3, t4 - 1); else { /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */ for (ind = 1 ; ind < t4 - 1; ind++) (r3)[ind] = CNST_LIMB(0); } TC4_DENORM(r4, n4, t4 - 1); TC4_DENORM(r5, n5, t4 - 1); TC4_DENORM(r6, n6, t4 - 1); TC4_DENORM(r7, n7, t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length) /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != 2*n) { MPN_ZERO((rp + rpn), 2*n - rpn); } __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1)); }

void mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m) { mp_ptr xp, tp, qp, mp, bp; mp_size_t xn, tn, mn, bn; int m_zero_cnt; int c; mp_limb_t e; TMP_DECL; mp = PTR(m); mn = ABSIZ(m); if (mn == 0) DIVIDE_BY_ZERO; if (el == 0) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending on if MOD equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; return; } TMP_MARK; /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } bn = ABSIZ(b); bp = PTR(b); if (bn > mn) { /* Reduce possibly huge base. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn); reduce (new_bp, bp, bn, mp, mn); bp = new_bp; bn = mn; /* Canonicalize the base, since we are potentially going to multiply with it quite a few times. */ MPN_NORMALIZE (bp, bn); } if (bn == 0) { SIZ(r) = 0; TMP_FREE; return; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); xp = TMP_ALLOC_LIMBS (mn); qp = TMP_ALLOC_LIMBS (mn + 1); MPN_COPY (xp, bp, bn); xn = bn; e = el; count_leading_zeros (c, e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = BITS_PER_MP_LIMB - 1 - c; /* Main loop. */ /* If m is already normalized (high bit of high limb set), and b is the same size, but a bigger value, and e==1, then there's no modular reductions done and we can end up with a result out of range at the end. */ if (c == 0) { if (xn == mn && mpn_cmp (xp, mp, mn) >= 0) mpn_sub_n (xp, xp, mp, mn); goto finishup; } while (c != 0) { mpn_sqr_n (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } if ((mp_limb_signed_t) e < 0) { mpn_mul (tp, xp, xn, bp, bn); tn = xn + bn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } } e <<= 1; c--; } finishup: /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it with the original MOD. */ if (m_zero_cnt != 0) { mp_limb_t cy; cy = mpn_lshift (tp, xp, xn, m_zero_cnt); tp[xn] = cy; xn += cy != 0; if (xn < mn) { MPN_COPY (xp, tp, xn); } else { mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn); xn = mn; } mpn_rshift (xp, xp, xn, m_zero_cnt); } MPN_NORMALIZE (xp, xn); if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0) { mp = PTR(m); /* want original, unnormalized m */ mpn_sub (xp, mp, mn, xp, xn); xn = mn; MPN_NORMALIZE (xp, xn); } MPZ_REALLOC (r, xn); SIZ (r) = xn; MPN_COPY (PTR(r), xp, xn); TMP_FREE; }

void tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y) { mp_size_t sign, wu, xu, ws, new_wn, min_size, dsize; mp_limb_t cy; /* w unaffected if x==0 or y==0 */ if (xn == 0 || y == 0) return; sign = xn; xu = ABS (xn); ws = *wn; if (*wn == 0) { /* nothing to add to, just set x*y, "sign" gives the sign */ cy = mpn_mul_1 (wp, xp, xu, y); if (cy) { wp[xu] = cy; xu = xu + 1; } *wn = (sign >= 0 ? xu : -xu); return; } sign ^= *wn; wu = ABS (*wn); new_wn = MAX (wu, xu); min_size = MIN (wu, xu); if (sign >= 0) { /* addmul of absolute values */ cy = mpn_addmul_1 (wp, xp, min_size, y); dsize = xu - wu; #if HAVE_NATIVE_mpn_mul_1c if (dsize > 0) cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy); else if (dsize < 0) { dsize = -dsize; cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); } #else if (dsize != 0) { mp_limb_t cy2; if (dsize > 0) cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y); else { dsize = -dsize; cy2 = 0; } cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); } #endif if (cy) { wp[dsize + min_size] = cy; new_wn ++; } } else { /* submul of absolute values */ cy = mpn_submul_1 (wp, xp, min_size, y); if (wu >= xu) { /* if w bigger than x, then propagate borrow through it */ if (wu != xu) cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy); if (cy != 0) { /* Borrow out of w, take twos complement negative to get absolute value, flip sign of w. */ wp[new_wn] = ~-cy; /* extra limb is 0-cy */ mpn_not (wp, new_wn); new_wn++; MPN_INCR_U (wp, new_wn, CNST_LIMB(1)); ws = -*wn; } } else /* wu < xu */ { /* x bigger than w, so want x*y-w. Submul has given w-x*y, so take twos complement and use an mpn_mul_1 for the rest. */ mp_limb_t cy2; /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ mpn_not (wp, wu); cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1)); cy -= 1; /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ cy2 = (cy == MP_LIMB_T_MAX); cy += cy2; MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy); wp[new_wn] = cy; new_wn += (cy != 0); /* Apply any -1 from above. The value at wp+wsize is non-zero because y!=0 and the high limb of x will be non-zero. */ if (cy2) MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1)); ws = -*wn; } /* submul can produce high zero limbs due to cancellation, both when w has more limbs or x has more */ MPN_NORMALIZE (wp, new_wn); } *wn = (ws >= 0 ? new_wn : -new_wn); ASSERT (new_wn == 0 || wp[new_wn - 1] != 0); }

/* FIXME: x Take scratch parameter, and figure out scratch need. x Use some fallback for small M->n? */ static mp_size_t hgcd_matrix_apply (const struct hgcd_matrix *M, mp_ptr ap, mp_ptr bp, mp_size_t n) { mp_size_t an, bn, un, vn, nn; mp_size_t mn[2][2]; mp_size_t modn; mp_ptr tp, sp, scratch; mp_limb_t cy; unsigned i, j; TMP_DECL; ASSERT ( (ap[n-1] | bp[n-1]) > 0); an = n; MPN_NORMALIZE (ap, an); bn = n; MPN_NORMALIZE (bp, bn); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) { mp_size_t k; k = M->n; MPN_NORMALIZE (M->p[i][j], k); mn[i][j] = k; } ASSERT (mn[0][0] > 0); ASSERT (mn[1][1] > 0); ASSERT ( (mn[0][1] | mn[1][0]) > 0); TMP_MARK; if (mn[0][1] == 0) { /* A unchanged, M = (1, 0; q, 1) */ ASSERT (mn[0][0] == 1); ASSERT (M->p[0][0][0] == 1); ASSERT (mn[1][1] == 1); ASSERT (M->p[1][1][0] == 1); /* Put B <-- B - q A */ nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]); } else if (mn[1][0] == 0) { /* B unchanged, M = (1, q; 0, 1) */ ASSERT (mn[0][0] == 1); ASSERT (M->p[0][0][0] == 1); ASSERT (mn[1][1] == 1); ASSERT (M->p[1][1][0] == 1); /* Put A <-- A - q * B */ nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]); } else { /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01. B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */ un = MIN (an - mn[0][0], bn - mn[1][0]) + 1; vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1; nn = MAX (un, vn); /* In the range of interest, mulmod_bnm1 should always beat mullo. */ modn = mpn_mulmod_bnm1_next_size (nn + 1); scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n)); tp = TMP_ALLOC_LIMBS (modn); sp = TMP_ALLOC_LIMBS (modn); ASSERT (n <= 2*modn); if (n > modn) { cy = mpn_add (ap, ap, modn, ap + modn, n - modn); MPN_INCR_U (ap, modn, cy); cy = mpn_add (bp, bp, modn, bp + modn, n - modn); MPN_INCR_U (bp, modn, cy); n = modn; } mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch); mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch); /* FIXME: Handle the small n case in some better way. */ if (n + mn[1][1] < modn) MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]); if (n + mn[0][1] < modn) MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]); cy = mpn_sub_n (tp, tp, sp, modn); MPN_DECR_U (tp, modn, cy); ASSERT (mpn_zero_p (tp + nn, modn - nn)); mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch); MPN_COPY (ap, tp, nn); mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch); if (n + mn[1][0] < modn) MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]); if (n + mn[0][0] < modn) MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]); cy = mpn_sub_n (tp, tp, sp, modn); MPN_DECR_U (tp, modn, cy); ASSERT (mpn_zero_p (tp + nn, modn - nn)); MPN_COPY (bp, tp, nn); while ( (ap[nn-1] | bp[nn-1]) == 0) { nn--; ASSERT (nn > 0); } } TMP_FREE; return nn; }

static mp_bitcnt_t bit_size (mp_srcptr xp, mp_size_t n) { MPN_NORMALIZE (xp, n); return n > 0 ? mpn_sizeinbase (xp, n, 2) : 0; }