void mpn_invert_trunc(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap) { mp_ptr tp; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * m); MPN_COPY(x_new, xp + n - m, m); ap += (n - m); mpn_mul_n (tp, x_new, ap, m); mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */ /* now check B^(2n) - X*A <= A */ mpn_not (tp, 2 * m); mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */ while (tp[m] || mpn_cmp (tp, ap, m) > 0) { mpn_add_1(x_new, x_new, m, 1); tp[m] -= mpn_sub_n(tp, tp, ap, m); } TMP_FREE; }
static mp_limb_t mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { mp_limb_t qhl, cc; mp_size_t n2 = n/2; if (n % 2 != 0) { mp_ptr qp1 = qp + 1; qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch); qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2, mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch)); cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]); cc = mpn_sub_1 (np + n, np + n, 1, cc); if (qhl != 0) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]); while (cc != 0) { qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1); cc -= mpn_add_n (np + 1, np + 1, dp, n); } qhl += mpn_add_1 (qp1, qp1, n - 1, mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); } else { qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch); qhl += mpn_add_1 (qp + n2, qp + n2, n2, mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch)); } return qhl; }
void bn_sqra_low(dig_t *c, const dig_t *a, int size) { dig_t carry, digit = *a; carry = mpn_addmul_1(c, a, size, digit); mpn_add_1(c + size, c + size, size, carry); if (size > 1) { carry = mpn_addmul_1(c + 1, a + 1, size - 1, digit); mpn_add_1(c + size, c + size, size, carry); } }
int test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n) { int res = 1; mp_size_t i; mp_ptr tp, up; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * n); up = TMP_ALLOC_LIMBS (2 * n); /* first check X*A < B^(2*n) */ mpn_mul_n (tp, xp, ap, n); cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */ if (cy != 0) return 0; /* now check B^(2n) - X*A <= A */ mpn_com_n (tp, tp, 2 * n); mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */ MPN_ZERO (up, 2 * n); MPN_COPY (up, ap, n); res = mpn_cmp (tp, up, 2 * n) <= 0; TMP_FREE; return res; }
/* Put in rp[n..2n-1] an approximation of the n high limbs of {np, n}^2. The error is less than n ulps of rp[n]. */ void mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n) { mp_size_t k; MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */ k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n] : (n+4)/2; /* ensures that k >= (n+3)/2 */ MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n)); if (k < 0) /* we can't use mpn_sqr_basecase here, since it requires n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD is not exported by GMP */ mpn_sqr_n (rp, np, n); else if (k == 0) mpfr_mulhigh_n_basecase (rp, np, np, n); else { mp_size_t l = n - k; mp_limb_t cy; mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */ /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */ cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1); cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } }
/* sets x to x+sign(x)*ulp(x) */ int mpfr_add_one_ulp (mpfr_ptr x, mp_rnd_t rnd_mode) { mp_size_t xn; int sh; mp_limb_t *xp; if (MPFR_IS_NAN(x)) MPFR_RET_NAN; if (MPFR_IS_INF(x) || MPFR_IS_ZERO(x)) return 0; xn = 1 + (MPFR_PREC(x) - 1) / BITS_PER_MP_LIMB; sh = xn * BITS_PER_MP_LIMB - MPFR_PREC(x); xp = MPFR_MANT(x); if (mpn_add_1 (xp, xp, xn, MP_LIMB_T_ONE << sh)) /* got 1.0000... */ { mp_exp_t exp = MPFR_EXP(x); if (exp == __mpfr_emax) return mpfr_set_overflow(x, rnd_mode, MPFR_SIGN(x)); else { MPFR_EXP(x)++; xp[xn-1] = GMP_LIMB_HIGHBIT; } } return 0; }
REGPARM_ATTR (1) static void cfdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir) { mp_size_t wsize, usize, abs_usize, limb_cnt, i; mp_srcptr up; mp_ptr wp; mp_limb_t round, rmask; usize = SIZ (u); abs_usize = ABS (usize); limb_cnt = cnt / GMP_NUMB_BITS; wsize = abs_usize - limb_cnt; if (wsize <= 0) { /* u < 2**cnt, so result 1, 0 or -1 according to rounding */ PTR(w)[0] = 1; SIZ(w) = (usize == 0 || (usize ^ dir) < 0 ? 0 : dir); return; } /* +1 limb to allow for mpn_add_1 below */ MPZ_REALLOC (w, wsize+1); /* Check for rounding if direction matches u sign. Set round if we're skipping non-zero limbs. */ up = PTR(u); round = 0; rmask = ((usize ^ dir) >= 0 ? MP_LIMB_T_MAX : 0); if (rmask != 0) for (i = 0; i < limb_cnt && round == 0; i++) round = up[i]; wp = PTR(w); cnt %= GMP_NUMB_BITS; if (cnt != 0) { round |= rmask & mpn_rshift (wp, up + limb_cnt, wsize, cnt); wsize -= (wp[wsize - 1] == 0); } else MPN_COPY_INCR (wp, up + limb_cnt, wsize); if (round != 0) { if (wsize != 0) { mp_limb_t cy; cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1)); wp[wsize] = cy; wsize += cy; } else { /* We shifted something to zero. */ wp[0] = 1; wsize = 1; } } SIZ(w) = (usize >= 0 ? wsize : -wsize); }
/* Put in rp[n..2n-1] an approximation of the n high limbs of {np, n} * {mp, n}. The error is less than n ulps of rp[n] (and the approximation is always less or equal to the truncated full product). Implements Algorithm ShortMul from [1]. */ void mpfr_mulhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp, mp_size_t n) { mp_size_t k; MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */ k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4); /* Algorithm ShortMul from [1] requires k >= (n+3)/2, which translates into k >= (n+4)/2 in the C language. */ MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n)); if (k < 0) mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */ else if (k == 0) mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */ else if (n > MUL_FFT_THRESHOLD) mpn_mul_n (rp, np, mp, n); /* result is exact, no error */ else { mp_size_t l = n - k; mp_limb_t cy; mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */ cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */ cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } }
/* Twos-complement version of 'integer_gmp_mpn_rshift' for performing * arithmetic right shifts on "negative" MPNs. * * Same pre-conditions as 'integer_gmp_mpn_rshift' * * This variant is needed to operate on MPNs interpreted as negative * numbers, which require "rounding" towards minus infinity iff a * non-zero bit is shifted out. */ mp_limb_t integer_gmp_mpn_rshift_2c (mp_limb_t rp[], const mp_limb_t sp[], const mp_size_t sn, const mp_bitcnt_t count) { const mp_size_t limb_shift = count / GMP_NUMB_BITS; const unsigned int bit_shift = count % GMP_NUMB_BITS; const mp_size_t rn = sn - limb_shift; // whether non-zero bits were shifted out bool nz_shift_out = false; if (bit_shift) { if (mpn_rshift(rp, &sp[limb_shift], rn, bit_shift)) nz_shift_out = true; } else memcpy(rp, &sp[limb_shift], rn*sizeof(mp_limb_t)); if (!nz_shift_out) for (unsigned i = 0; i < limb_shift; i++) if (sp[i]) { nz_shift_out = true; break; } // round if non-zero bits were shifted out if (nz_shift_out) if (mpn_add_1(rp, rp, rn, 1)) abort(); /* should never happen */ return rp[rn-1]; }
void fp_rdcn_low(dig_t *c, dig_t *a) { int i; dig_t r, c0, c1, u, *tmp; const dig_t *m; u = *(fp_prime_get_rdc()); m = fp_prime_get(); tmp = a; c1 = 0; for (i = 0; i < FP_DIGS; i++, tmp++) { r = (dig_t)(*tmp * u); c0 = mpn_addmul_1(tmp, m, FP_DIGS, r); c1 += mpn_add_1(tmp + FP_DIGS, tmp + FP_DIGS, FP_DIGS - i, c0); } for (i = 0; i < FP_DIGS; i++, tmp++) { c[i] = *tmp; } for (i = 0; i < c1; i++) { fp_subn_low(c, c, m); } if (fp_cmp(c, m) != CMP_LT) { fp_subn_low(c, c, m); } }
/* Needs n+1 limbs of temporary storage. */ int mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp) { mp_limb_t cy; int neg; ASSERT (x3n > 0); ASSERT (x3n <= n); /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */ #if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n #if HAVE_NATIVE_mpn_addlsh2_n xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n); cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n); #else /* HAVE_NATIVE_mpn_addlsh_n */ xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2); cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2); #endif if (x3n < n) cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy); tp[n] = cy; #else cy = mpn_lshift (tp, xp + 2*n, n, 2); xp2[n] = cy + mpn_add_n (xp2, tp, xp, n); tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2); if (x3n < n) tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1); else tp[n] += mpn_add_n (tp, xp + n, tp, n); #endif mpn_lshift (tp, tp, n+1, 1); neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; #if HAVE_NATIVE_mpn_sumdiff_n if (neg) mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1); else mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1); #else if (neg) mpn_sub_n (xm2, tp, xp2, n + 1); else mpn_sub_n (xm2, xp2, tp, n + 1); mpn_add_n (xp2, xp2, tp, n + 1); #endif ASSERT (xp2[n] < 15); ASSERT (xm2[n] < 10); return neg; }
void mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) { mp_ptr xp; mp_size_t rn, newrn; mp_size_t sizes[NPOWS], *sizp; mp_limb_t di; /* Compute the computation precisions from highest to lowest, leaving the base case size in 'rn'. */ sizp = sizes; for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1) *sizp++ = rn; xp = scratch; /* Compute a base value using a low-overhead O(n^2) algorithm. FIXME: We should call some divide-and-conquer lsb division function here for an operand subrange. */ MPN_ZERO (xp, rn); xp[0] = 1; binvert_limb (di, up[0]); if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD)) mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di); else mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di); /* Use Newton iterations to get the desired precision. */ for (; rn < n; rn = newrn) { newrn = *--sizp; #if WANT_FFT if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD)) { int k; mp_size_t m, i; k = mpn_fft_best_k (newrn, 0); m = mpn_fft_next_size (newrn, k); mpn_mul_fft (xp, m, up, newrn, rp, rn, k); for (i = rn - 1; i >= 0; i--) if (xp[i] > (i == 0)) { mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1); break; } } else #endif mpn_mul (xp, up, newrn, rp, rn); mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn); mpn_neg_n (rp + rn, rp + rn, newrn - rn); } }
static void gst_mpz_sub_ui (gst_mpz *dif, const gst_mpz *min, mp_limb_t sub) { mp_srcptr minp; mp_ptr difp; mp_size_t minsize, difsize; mp_size_t abs_minsize; minsize = min->size; abs_minsize = ABS (minsize); /* If not space for SUM (and possible carry), increase space. */ difsize = abs_minsize + 1; if (dif->alloc < difsize) gst_mpz_realloc (dif, difsize); /* These must be after realloc (ADD1 may be the same as SUM). */ minp = min->d; difp = dif->d; if (sub == 0) { MPN_COPY (difp, minp, abs_minsize); dif->size = minsize; return; } if (abs_minsize == 0) { difp[0] = sub; dif->size = -1; return; } if (minsize < 0) { difsize = mpn_add_1 (difp, minp, abs_minsize, sub); if (difsize != 0) difp[abs_minsize] = 1; difsize = -(difsize + abs_minsize); } else { /* The signs are different. Need exact comparision to determine which operand to subtract from which. */ if (abs_minsize == 1 && minp[0] < sub) difsize = -(abs_minsize + mpn_sub_1 (difp, &sub, 1, *minp)); else difsize = (abs_minsize + mpn_sub_1 (difp, minp, abs_minsize, sub)); } dif->size = difsize; }
void mpn_sbpi1_bdiv_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; mp_limb_t cy, q; ASSERT (dn > 0); ASSERT (nn >= dn); ASSERT ((dp[0] & 1) != 0); /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK, but some over N/Q overlaps will not work. */ for (i = nn - dn; i > 0; i--) { q = dinv * np[0]; cy = mpn_addmul_1 (np, dp, dn, q); mpn_add_1 (np + dn, np + dn, i, cy); ASSERT (np[0] == 0); qp[0] = ~q; qp++; np++; } for (i = dn; i > 1; i--) { q = dinv * np[0]; mpn_addmul_1 (np, dp, i, q); ASSERT (np[0] == 0); qp[0] = ~q; qp++; np++; } /* Final limb */ q = dinv * np[0]; qp[0] = ~q; mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1); }
/* c is the top bits of the inputs, (fully reduced) c & 2 is the top bit of y c & 1 is the top bit of z */ int mpn_mulmod_2expp1_basecase (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, int c, mpir_ui b, mp_ptr tp) { int cy, cz; mp_size_t n, k; cy = c & 2; cz = c & 1; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #if WANT_ASSERT { mp_size_t t = n; MPN_NORMALIZE(yp, t); ASSERT(cy == 0 || t == 0); t = n; MPN_NORMALIZE(zp, t); ASSERT(cz == 0 || t == 0); } #endif if (LIKELY (cy == 0)) { if (LIKELY (cz == 0)) { c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp); } else { c = mpn_neg_n (xp, yp, n); c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; } } else { if (LIKELY (cz == 0))
void mpn_sbpi1_bdiv_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; mp_limb_t cy, q; ASSERT (dn > 0); ASSERT (nn >= dn); ASSERT ((dp[0] & 1) != 0); for (i = nn - dn; i > 0; i--) { q = dinv * np[0]; qp[0] = ~q; qp++; cy = mpn_addmul_1 (np, dp, dn, q); mpn_add_1 (np + dn, np + dn, i, cy); ASSERT (np[0] == 0); np++; } for (i = dn; i > 1; i--) { q = dinv * np[0]; qp[0] = ~q; qp++; mpn_addmul_1 (np, dp, i, q); ASSERT (np[0] == 0); np++; } /* Final limb */ q = dinv * np[0]; qp[0] = ~q; mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1); }
void tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn) { mp_size_t yu = ABS(*yn); mp_size_t xu = ABS(xn); mp_limb_t cy = 0; if (xn == 0) return; if (offset < yu) /* low part of x overlaps with y */ { if (offset + xu <= yu) /* x entirely inside y */ { cy = mpn_add_n (yp + offset, yp + offset, xp, xu); if (offset + xu < yu) cy = mpn_add_1 (yp + offset + xu, yp + offset + xu, yu - (offset + xu), cy); } else cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset); /* now cy is the carry at yp + yu */ if (xu + offset > yu) /* high part of x exceeds y */ { MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu); cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy); yu = xu + offset; } /* now cy is the carry at yp + yn */ if (cy) yp[yu++] = cy; MPN_NORMALIZE(yp, yu); *yn = yu; } else /* x does not overlap */ { if (offset > yu) MPN_ZERO (yp + yu, offset - yu); MPN_COPY (yp + offset, xp, xu); *yn = offset + xu; } }
// Montgomery reduction. // Algorithm II.4 from Blake, Seroussi and Smart. static void mont_reduce(mp_limb_t *x, mp_limb_t *y, fptr p) { size_t t = p->limbs; size_t i; mp_limb_t flag = 0; for (i = 0; i < t; i++) { mp_limb_t u = y[i] * p->negpinv; mp_limb_t carry = mpn_addmul_1(&y[i], p->primelimbs, t, u); //mpn_add_1(&y[i+t], &y[i+t], t - i + 1, carry); flag += mpn_add_1(&y[i + t], &y[i + t], t - i, carry); } if (flag || mpn_cmp(&y[t], p->primelimbs, t) >= 0) { mpn_sub_n(x, &y[t], p->primelimbs, t); } else { // TODO: GMP set might be faster. memcpy(x, &y[t], t * sizeof(mp_limb_t)); } }
static void mpf_normalize(mpf_t op) { Py_ssize_t size, prec, toclear, temp, i; mp_limb_t bit1, rem, carry; prec = mpf_get_prec(op); size = mpf_size(op); toclear = size - ((prec / GMP_NUMB_BITS) + 1); if(toclear>0) { bit1 = (op->_mp_d[toclear-1] & ((mp_limb_t)1 << (GMP_NUMB_BITS - 1))) ? 1 : 0; rem = (op->_mp_d[toclear-1] & (((mp_limb_t)1 << (GMP_NUMB_BITS - 1)) - 1)) ? 1 : 0; carry = bit1 && ((op->_mp_d[toclear] & 1) || rem); } else { carry = 0; } if(options.debug) { fprintf(stderr, "prec %ld size %ld toclear %ld carry %ld\n", prec, size, toclear, carry); for(i=0; i<size; i++) fprintf(stderr,"[%zd]=%lx\n", i, op->_mp_d[i]); } temp = toclear; if(temp>0) { op->_mp_d[--temp] = 0; } if(carry) { if(options.debug) { fprintf(stderr, "adding carry bit\n"); } carry = mpn_add_1(op->_mp_d + toclear, op->_mp_d + toclear, size-toclear, carry); if(carry) { if(options.debug) { fprintf(stderr, "carry bit extended\n"); } op->_mp_d[size-1] = 1; op->_mp_exp++; } } if(options.debug) { for(i=0; i<size; i++) fprintf(stderr,"[%zd]=%lx\n", i, op->_mp_d[i]); } }
mp_limb_t mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { mp_limb_t cy; mp_limb_t xp[2], dip[2]; ASSERT (dn >= 2); cy = mpn_add_1 (xp, dp + dn - 2, 2, 1); if (cy != 0) dip[0] = dip[1] = 0; else { mp_limb_t scratch[10]; /* FIXME */ mpn_invert (dip, xp, 2, scratch); } return mpn_preinv_dc_divappr_q (qp, np, nn, dp, dn, dip); }
void fmpz_sub_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { unsigned long carry; if (x) { if (!input[0]) { output[1] = x; output[0] = -1L; } else if ((long) input[0] < 0) { carry = mpn_add_1(output + 1, input + 1, ABS(input[0]), x); output[0] = input[0]; if (carry) { output[ABS(output[0])+1] = carry; output[0]--; } } else if ((long) input[0] > 1L) { mpn_sub_1(output + 1, input + 1, input[0], x); output[0] = input[0]; NORM(output); } else { if (x <= input[1]) { output[1] = input[1] - x; if (!output[1]) output[0] = 0; else output[0] = 1L; } else { output[1] = x - input[1]; output[0] = -1L; } } } else { fmpz_set(output, input); } }
void __fmpz_add_ui_inplace(fmpz_t output, const unsigned long x) { unsigned long carry; if (x) { if (!output[0]) { output[1] = x; output[0] = 1; } else { carry = mpn_add_1(output + 1, output + 1, output[0], x); if (carry) { output[output[0]+1] = carry; output[0]++; } } } }
void mpir_butterfly_lshB(mp_ptr t, mp_ptr u, mp_ptr i1, mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y) { mp_limb_t cy, cy1, cy2; if (x == 0) { if (y == 0) cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs + 1); else { cy = mpn_sumdiff_n(t, u + y, i1, i2, limbs - y); u[limbs] = -(cy&1); cy1 = cy>>1; cy = mpn_sumdiff_n(t + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y); t[limbs] = cy>>1; mpn_add_1(t + limbs - y, t + limbs - y, y + 1, cy1); cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); mpn_addmod_2expp1_1(u + y, limbs - y, cy1); cy1 = -(i1[limbs] + i2[limbs]); mpn_addmod_2expp1_1(t, limbs, cy1); } } else if (y == 0)
void tc4_addlsh1_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn) { if (xn) { if (xn >= *rn) { mp_limb_t cy; if (xn > *rn) MPN_ZERO(rp + *rn, xn - *rn); #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(rp, rp, xp, xn); #else cy = mpn_add_n(rp, rp, xp, xn); cy += mpn_add_n(rp, rp, xp, xn); #endif if (cy) { rp[xn] = cy; *rn = xn + 1; } else *rn = xn; } else { mp_limb_t cy; #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(rp, rp, xp, xn); #else cy = mpn_add_n(rp, rp, xp, xn); cy += mpn_add_n(rp, rp, xp, xn); #endif if (cy) cy = mpn_add_1(rp + xn, rp + xn, *rn - xn, cy); if (cy) { rp[*rn] = cy; (*rn)++; } } } }
void fmpz_sub_ui_inplace(fmpz_t output, const unsigned long x) { unsigned long carry; if (x) { if (!output[0]) { output[1] = x; output[0] = -1L; } else if ((long) output[0] < 0) { carry = mpn_add_1(output + 1, output + 1, ABS(output[0]), x); if (carry) { output[ABS(output[0])+1] = carry; output[0]--; } } else if ((long) output[0] > 1L) { mpn_sub_1(output + 1, output + 1, output[0], x); NORM(output); } else { if (x <= output[1]) { output[1] -= x; if (!output[1]) output[0] = 0; } else { output[1] = x - output[1]; output[0] = -1L; } } } }
void mpn_toom53_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; int vm1_neg, vmh_neg; mp_limb_t cy; mp_ptr gp, hp; mp_ptr as1, asm1, as2, ash, asmh; mp_ptr bs1, bsm1, bs2, bsh, bsmh; enum toom4_flags flags; TMP_DECL; #define a0 ap #define a1 (ap + n) #define a2 (ap + 2*n) #define a3 (ap + 3*n) #define a4 (ap + 4*n) #define b0 bp #define b1 (bp + n) #define b2 (bp + 2*n) n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); s = an - 4 * n; t = bn - 2 * n; ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); TMP_MARK; as1 = TMP_SALLOC_LIMBS (n + 1); asm1 = TMP_SALLOC_LIMBS (n + 1); as2 = TMP_SALLOC_LIMBS (n + 1); ash = TMP_SALLOC_LIMBS (n + 1); asmh = TMP_SALLOC_LIMBS (n + 1); bs1 = TMP_SALLOC_LIMBS (n + 1); bsm1 = TMP_SALLOC_LIMBS (n + 1); bs2 = TMP_SALLOC_LIMBS (n + 1); bsh = TMP_SALLOC_LIMBS (n + 1); bsmh = TMP_SALLOC_LIMBS (n + 1); gp = pp; hp = pp + n + 1; /* Compute as1 and asm1. */ gp[n] = mpn_add_n (gp, a0, a2, n); gp[n] += mpn_add (gp, gp, n, a4, s); hp[n] = mpn_add_n (hp, a1, a3, n); #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (as1, asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_addsub_n (as1, asm1, gp, hp, n + 1); vm1_neg = 0; } #else mpn_add_n (as1, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_sub_n (asm1, gp, hp, n + 1); vm1_neg = 0; } #endif /* Compute as2. */ #if !HAVE_NATIVE_mpn_addlsh_n ash[n] = mpn_lshift (ash, a2, n, 2); /* 4a2 */ #endif #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n (as2, a3, a4, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n); cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n); as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); #else cy = mpn_lshift (as2, a4, s, 1); cy += mpn_add_n (as2, a3, as2, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 4 * cy + mpn_lshift (as2, as2, n, 2); cy += mpn_add_n (as2, a1, as2, n); cy = 2 * cy + mpn_lshift (as2, as2, n, 1); as2[n] = cy + mpn_add_n (as2, a0, as2, n); mpn_add_n (as2, ash, as2, n + 1); #endif /* Compute ash and asmh. */ #if HAVE_NATIVE_mpn_addlsh_n cy = mpn_addlsh_n (gp, a2, a0, n, 2); /* 4a0 + a2 */ cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2); /* 16a0 + 4a2 + a4 */ /* FIXME s */ gp[n] = cy; cy = mpn_addlsh_n (hp, a3, a1, n, 2); /* 4a1 + a3 */ cy = 2 * cy + mpn_lshift (hp, hp, n, 1); /* 8a1 + 2a3 */ hp[n] = cy; #else gp[n] = mpn_lshift (gp, a0, n, 4); /* 16a0 */ mpn_add (gp, gp, n + 1, a4, s); /* 16a0 + a4 */ mpn_add_n (gp, ash, gp, n+1); /* 16a0 + 4a2 + a4 */ cy = mpn_lshift (hp, a1, n, 3); /* 8a1 */ cy += mpn_lshift (ash, a3, n, 1); /* 2a3 */ cy += mpn_add_n (hp, ash, hp, n); /* 8a1 + 2a3 */ hp[n] = cy; #endif #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (ash, asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_addsub_n (ash, asmh, gp, hp, n + 1); vmh_neg = 0; } #else mpn_add_n (ash, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_sub_n (asmh, gp, hp, n + 1); vmh_neg = 0; } #endif /* Compute bs1 and bsm1. */ bs1[n] = mpn_add (bs1, b0, n, b2, t); /* b0 + b2 */ #if HAVE_NATIVE_mpn_addsub_n if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0) { bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1; bsm1[n] = 0; vm1_neg ^= 1; }
/* ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1 needs (tp, 2n) temp space, everything reduced mod 2^b inputs, outputs are fully reduced N.B: 2n is not the same as 2b rounded up to nearest limb! */ inline static int mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, mpir_ui b, mp_ptr tp) { mp_size_t n, k; mp_limb_t c; TMP_DECL; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #ifndef TUNE_PROGRAM_BUILD if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n)) { mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_ptr tx, ty, tz; mp_limb_t ret; TMP_MARK; tx = TMP_BALLOC_LIMBS(3*n + 3); ty = tx + n + 1; tz = ty + n + 1; MPN_COPY(ty, yp, n); MPN_COPY(tz, zp, n); ty[n] = 0; tz[n] = 0; while ((((mp_limb_t)1)<<depth) < b) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = b/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1); MPN_COPY(xp, tx, n); ret = tx[n]; TMP_FREE; return ret; } #endif if (yp == zp) mpn_sqr(tp, yp, n); else mpn_mul_n (tp, yp, zp, n); if (k == 0) { c = mpn_sub_n (xp, tp, tp + n, n); return mpn_add_1 (xp, xp, n, c); } c = tp[n - 1]; tp[n - 1] &= GMP_NUMB_MASK >> k; #if HAVE_NATIVE_mpn_sublsh_nc c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); #else { mp_limb_t c1; c1 = mpn_lshift (tp + n, tp + n, n, k); tp[n] |= c >> (GMP_NUMB_BITS - k); c = mpn_sub_n (xp, tp, tp + n, n) + c1; } #endif c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; return c; }
if (LIKELY (cy == 0)) { if (LIKELY (cz == 0)) { c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp); } else { c = mpn_neg_n (xp, yp, n); c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; } } else { if (LIKELY (cz == 0)) { c = mpn_neg_n (xp, zp, n); c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; } else { c = 0; xp[0] = 1; MPN_ZERO (xp + 1, n - 1); } } return c; }
void mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) { mp_limb_t cy; mp_size_t n3; mp_size_t n3p1; n3 = 3 * n; n3p1 = n3 + 1; #define r4 (pp + n3) /* 3n+1 */ #define r2 (pp + 7 * n) /* 3n+1 */ #define r0 (pp +11 * n) /* s+t <= 2*n */ /******************************* interpolation *****************************/ if (half != 0) { cy = mpn_sub_n (r3, r3, r0, spt); MPN_DECR_U (r3 + spt, n3p1 - spt, cy); cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi); MPN_DECR_U (r2 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi); cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi); MPN_DECR_U (r1 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi); }; r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi); DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r1, r4, r4, r1, n3p1); #else ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1)); mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */ MP_PTR_SWAP(r1, wsi); #endif r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi); DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); #else mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); MP_PTR_SWAP(r5, wsi); #endif r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n); #if AORSMUL_FASTER_AORS_AORSLSH mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */ #else mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */ DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */ #endif /* A division by 2835x4 follows. Warning: the operand can be negative! */ mpn_divexact_by2835x4(r4, r4, n3p1); if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); #if AORSMUL_FASTER_2AORSLSH mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */ #else DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */ DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */ #endif mpn_divexact_by255(r5, r5, n3p1); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi)); #if AORSMUL_FASTER_3AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100)); #else ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi)); #endif ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi)); mpn_divexact_by42525(r1, r1, n3p1); #if AORSMUL_FASTER_AORS_2AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225)); #else ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1)); ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi)); #endif mpn_divexact_by9x4(r2, r2, n3p1); ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1)); mpn_sub_n (r4, r2, r4, n3p1); ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1)); ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1)); mpn_add_n (r5, r5, r1, n3p1); ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); /* last interpolation steps... */ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1)); /* ... could be mixed with recomposition ||H-r5|M-r5|L-r5| ||H-r1|M-r1|L-r1| */ /***************************** recomposition *******************************/ /* pp[] prior to operations: |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp summation scheme for remaining operations: |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| */ cy = mpn_add_n (pp + n, pp + n, r5, n); cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy); #if HAVE_NATIVE_mpn_add_nc cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy); #else MPN_INCR_U (r5 + 2 * n, n + 1, cy); cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n); #endif MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy); pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n); cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]); #if HAVE_NATIVE_mpn_add_nc cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy); #else MPN_INCR_U (r3 + 2 * n, n + 1, cy); cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n); #endif MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n); if (half) { cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]); #if HAVE_NATIVE_mpn_add_nc if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy)); } #else MPN_INCR_U (r1 + 2 * n, n + 1, cy); if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt)); } #endif } else { ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n])); } #undef r0 #undef r2 #undef r4 }
/* Toom 4 interpolation. Interpolates the value at 2^(sn*B) of a polynomial p(x) with 7 coefficients given the values p(oo), p(2), p(1), p(-1), 2^6*p(1/2), 2^6*p(-1/2), p(0). The output is placed in rp and the final number of limbs of the output is given in rpn. The 4th and 6th values may be negative, and if so, n4 and n6 should be set to a negative value respectively. To save space we pass r3, r5, r7 in place in the output rp. The other r's are stored separately in space tp. The low limb of r3 is stored in r30, as it will be overwritten by the high limb of r5. rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> We assume that r1 is stored at tp, r2 at (tp + t4), r4 at (tp + 2*t4) and r6 (tp + 3*t4). Each of these r's has t4 = s4 + 1 limbs allocated. */ void mpn_toom4_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn, mp_ptr tp, mp_size_t s4, mp_size_t n4, mp_size_t n6, mp_limb_t r30) { mp_size_t n1, n2, n3, n5, n7, t4; mp_limb_t saved, saved2, cy; t4 = s4 + 1; mpn_add_n(r2, r2, r5, s4); if (n6 < 0) mpn_add_n(r6, r5, r6, s4); else mpn_sub_n(r6, r5, r6, s4); /* r6 is now in twos complement format */ saved = r3[0]; r3[0] = r30; if (n4 < 0) mpn_add_n(r4, r3, r4, s4); else mpn_sub_n(r4, r3, r4, s4); r3[0] = saved; /* r4 is now in twos complement format */ mpn_sub_n(r5, r5, r1, s4); #if HAVE_NATIVE_mpn_sublsh_n r5[s4-1] -= mpn_sublsh_n(r5, r5, r7, s4-1, 6); #else r5[s4-1] -= mpn_submul_1(r5, r7, s4-1, 64); #endif TC4_RSHIFT1(r4, s4); saved = r3[0]; r3[0] = r30; mpn_sub_n(r3, r3, r4, s4); r30 = r3[0]; r3[0] = saved; mpn_double(r5, s4); mpn_sub_n(r5, r5, r6, s4); saved = r3[0]; r3[0] = r30; mpn_submul_1(r2, r3, s4, 65); r3[0] = saved; saved2 = r7[s4-1]; r7[s4-1] = CNST_LIMB(0); // r7 is always positive so no sign extend needed saved = r3[0]; r3[0] = r30; #if HAVE_NATIVE_mpn_subadd_n mpn_subadd_n(r3, r3, r7, r1, s4); #else mpn_sub_n(r3, r3, r7, s4); mpn_sub_n(r3, r3, r1, s4); #endif r7[s4-1] = saved2; r30 = r3[0]; mpn_addmul_1(r2, r3, s4, 45); #if HAVE_NATIVE_mpn_sublsh_n cy = mpn_sublsh_n(r5, r5, r3, s4 - 1, 3); #else cy = mpn_submul_1(r5, r3, s4 - 1, 8); #endif r3[0] = saved; r3[0] -= (cy + 8*r3[s4-1]); mpn_rshift(r5, r5, s4, 3); mpn_divexact_by3(r5, r5, s4); mpn_sub_n(r6, r6, r2, s4); #if HAVE_NATIVE_mpn_sublsh_n mpn_sublsh_n(r2, r2, r4, s4, 4); #else mpn_submul_1(r2, r4, s4, 16); #endif mpn_rshift(r2, r2, s4, 1); mpn_divexact_by3(r2, r2, s4); mpn_divexact_by3(r2, r2, s4); saved = r3[0]; r3[0] = r30; cy = mpn_sub_n(r3, r3, r5, s4 - 1); r30 = r3[0]; r3[0] = saved; r3[s4-1] -= (cy + r5[s4-1]); mpn_sub_n(r4, r4, r2, s4); mpn_addmul_1(r6, r2, s4, 30); mpn_divexact_byfobm1(r6, r6, s4, CNST_LIMB(15), CNST_LIMB(~0/15)); mpn_rshift(r6, r6, s4, 2); mpn_sub_n(r2, r2, r6, s4); TC4_NORM(r1, n1, s4); TC4_NORM(r2, n2, s4); (*rpn) = 6*sn+1; cy = mpn_add_1(r3, r3, *rpn - 4*sn, r30); /* don't forget to add r3[0] back in */ if (cy) { rp[*rpn] = cy; (*rpn)++; } tc4_copy(rp, rpn, 5*sn, r2, n2); tc4_copy(rp, rpn, 6*sn, r1, n1); tc4_copy(rp, rpn, sn, r6, s4); tc4_copy(rp, rpn, 3*sn, r4, s4); }