mp_limb_t mpn_sumdiff_n(mp_ptr s,mp_ptr d,mp_srcptr x,mp_srcptr y,mp_size_t n) {mp_limb_t ret;mp_ptr t; ASSERT(n>0); ASSERT_MPN(x,n);ASSERT_MPN(y,n);//ASSERT_SPACE(s,n);ASSERT_SPACE(d,n); ASSERT(MPN_SAME_OR_SEPARATE_P(s,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(s,y,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,y,n)); ASSERT(!MPN_OVERLAP_P(s,n,d,n)); if( (s==x && d==y)||(s==y && d==x) ) {t=__GMP_ALLOCATE_FUNC_LIMBS(n); ret=mpn_sub_n(t,x,y,n); ret+=2*mpn_add_n(s,x,y,n); MPN_COPY(d,t,n); __GMP_FREE_FUNC_LIMBS(t,n); return ret;} if(s==x || s==y) {ret=mpn_sub_n(d,x,y,n); ret+=2*mpn_add_n(s,x,y,n); return ret;} ret=2*mpn_add_n(s,x,y,n); ret+=mpn_sub_n(d,x,y,n); return ret;}
mp_limb_t mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask; ASSERT (n >= 1); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n)); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n)); mask = -(mp_limb_t) (cnd != 0); cy = 0; do { ul = *up++; vl = *vp++ & mask; #if GMP_NAIL_BITS == 0 sl = ul - vl; cy1 = sl > ul; rl = sl - cy; cy2 = rl > sl; cy = cy1 | cy2; *rp++ = rl; #else rl = ul - vl; rl -= cy; cy = rl >> (GMP_LIMB_BITS - 1); *rp++ = rl & GMP_NUMB_MASK; #endif } while (--n != 0); return cy; }
mp_limb_t mpn_addadd_n(mp_ptr t,mp_srcptr x,mp_srcptr y,mp_srcptr z,mp_size_t n) {mp_limb_t ret; mp_srcptr a=x,b=y,c=z; ASSERT(n>0); ASSERT_MPN(x,n);ASSERT_MPN(y,n);ASSERT_MPN(z,n);//ASSERT_SPACE(t,n); ASSERT(MPN_SAME_OR_SEPARATE_P(t,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t,y,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t,z,n)); if(t==x) {if(t==y) {if(t==z) { #ifdef HAVE_NATIVE_mpn_addlsh1_n return mpn_addlsh1_n(t,x,y,n); #else return mpn_mul_1(t,x,n,3); #endif } } else {MP_SRCPTR_SWAP(b,c);} } else {MP_SRCPTR_SWAP(a,c);if(t==y)MP_SRCPTR_SWAP(a,b);} ret=mpn_add_n(t,a,b,n);return ret+mpn_add_n(t,t,c,n);}
mp_limb_t mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t ul, vl, sl, rl, cy, cy1, cy2; ASSERT (n >= 1); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n)); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n)); cy = 0; do { ul = *up++; vl = *vp++; sl = ul + vl; cy1 = sl < ul; rl = sl + cy; cy2 = rl < sl; cy = cy1 | cy2; *rp++ = rl; } while (--n != 0); return cy; }
/* c is the top bits of the inputs, (fully reduced) c & 2 is the top bit of y c & 1 is the top bit of z */ int mpn_mulmod_2expp1_basecase (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, int c, mpir_ui b, mp_ptr tp) { int cy, cz; mp_size_t n, k; cy = c & 2; cz = c & 1; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #if WANT_ASSERT { mp_size_t t = n; MPN_NORMALIZE(yp, t); ASSERT(cy == 0 || t == 0); t = n; MPN_NORMALIZE(zp, t); ASSERT(cz == 0 || t == 0); } #endif if (LIKELY (cy == 0)) { if (LIKELY (cz == 0)) { c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp); } else { c = mpn_neg_n (xp, yp, n); c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; } } else { if (LIKELY (cz == 0))
/* (xp, n) = (qp, n)*3 - ret*B^n and 0 <= ret < 3 */ mp_limb_t mpn_divexact_by3c(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t ci) { mp_size_t j; mp_limb_t c, m, acc, ax, dx; ASSERT(n > 0); ASSERT_MPN(xp, n); ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n)); m = 0; m = ~m; m = m/3; /* m = (B - 1)/3 */ acc = ci*m; for (j = 0; j <= n - 1; j++) { umul_ppmm(dx, ax, xp[j], m); SUBC_LIMB(c, acc, acc, ax); qp[j] = acc; acc -= dx + c; } /* return next quotient*(-3) */ return acc*(-3); }
void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) { mp_size_t i; mp_limb_t c, h, l, ls, s, s_next, inverse, dummy; unsigned shift; ASSERT (size >= 1); ASSERT (divisor != 0); ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); ASSERT_MPN (src, size); ASSERT_LIMB (divisor); s = src[0]; if (size == 1) { dst[0] = s / divisor; return; } if ((divisor & 1) == 0) { count_trailing_zeros (shift, divisor); divisor >>= shift; }
mp_limb_t mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t cy) { mp_limb_t s, sm, cl, q, qx, c2, c3; mp_size_t i; ASSERT (un >= 1); ASSERT (cy == 0 || cy == 1 || cy == 2); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un)); cl = cy == 0 ? 0 : cy == 1 ? -MODLIMB_INVERSE_3 : -2*MODLIMB_INVERSE_3; for (i = 0; i < un; i++) { s = up[i]; sm = (s * MODLIMB_INVERSE_3) & GMP_NUMB_MASK; q = (cl + sm) & GMP_NUMB_MASK; rp[i] = q; qx = q + (s < cy); c2 = qx >= GMP_NUMB_CEIL_MAX_DIV3; c3 = qx >= GMP_NUMB_CEIL_2MAX_DIV3 ; cy = c2 + c3; cl = (-c2 & -MODLIMB_INVERSE_3) + (-c3 & -MODLIMB_INVERSE_3); } return cy; }
/* (xp, n) = (qp, n)*f - ret*B^n and 0 <= ret < f Note the divexact_by3 code is just a special case of this */ mp_limb_t mpn_divexact_byfobm1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t f, mp_limb_t Bm1of) { mp_size_t j; mp_limb_t c, acc, ax, dx; ASSERT(n > 0); ASSERT_MPN(xp, n); ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n)); ASSERT(Bm1of*f + 1 == 0); acc = 0*Bm1of; /* carry in is 0 */ for (j = 0; j <= n - 1; j++) { umul_ppmm(dx, ax, xp[j], Bm1of); SUBC_LIMB(c, acc, acc, ax); qp[j] = acc; acc -= dx + c; } /* return next quotient*(-f) */ return acc*(-f); }
void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) { mp_limb_t inverse, s, s_next, c, l, ls, q; unsigned rshift, lshift; mp_limb_t lshift_mask; mp_limb_t divisor_h; ASSERT (size >= 1); ASSERT (divisor != 0); ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); ASSERT_MPN (src, size); ASSERT_LIMB (divisor); s = *src++; /* src low limb */ size--; if (size == 0) { *dst = s / divisor; return; } if ((divisor & 1) == 0) { count_trailing_zeros (rshift, divisor); divisor >>= rshift; }
mp_limb_t mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t c) { mp_limb_t l, q, s; mp_size_t i; ASSERT (un >= 1); ASSERT (c == 0 || c == 1 || c == 2); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un)); i = 0; do { s = up[i]; SUBC_LIMB (c, l, s, c); q = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK; rp[i] = q; c += (q >= GMP_NUMB_CEIL_MAX_DIV3); c += (q >= GMP_NUMB_CEIL_2MAX_DIV3); } while (++i < un); ASSERT (c == 0 || c == 1 || c == 2); return c; }
mp_limb_t divexact_submul(mp_ptr qp,mp_ptr xp,mp_size_t n) {int j;mp_limb_t c,m,t1,t2,t3,acc,ax,dx,t; ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n)); m=0;m=~m;m=m/3;// m=(B-1)/3 c=0;t1=t2=t3=acc=0; umul_ppmm(dx,ax,xp[0],m); SUB(c,acc,0,t1); ADC(c,t2,0,ax,c); ADC(c,t3,0,dx,c); ASSERT(c==0); t1=t2;t2=t3; for(j=1;j<=n-1;j++) { t3=0; umul_ppmm(dx,ax,xp[j],m); SUB(c,acc,acc,t1); qp[j-1]=acc; ADC(c,t2,t2,ax,c); ADC(c,t3,t3,dx,c); ASSERT(c==0); t1=t2;t2=t3; } SUB(c,acc,acc,t1); qp[n-1]=acc; ADC(c,t2,t2,0,c); t=(t2-acc)*3; // return next quotient*-3 return t;} // so (xp,n) = (qp,n)*3 -ret*B^n and 0 <= ret < 3
mp_limb_t mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t ul, vl, rl, cy; ASSERT (n >= 1); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n)); ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n)); cy = 0; do { ul = *up++; vl = *vp++; rl = ul + vl + cy; cy = rl >> GMP_NUMB_BITS; *rp++ = rl & GMP_NUMB_MASK; } while (--n != 0); return cy; }
mp_limb_t mpn_rsh_divrem_hensel_qr_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d, int s, mp_limb_t cin) { ASSERT(n > 0); ASSERT(s >= 0); ASSERT_MPN(xp, n); ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n)); ASSERT(d%2 == 1); if (BELOW_THRESHOLD(n, RSH_DIVREM_HENSEL_QR_1_THRESHOLD)) return mpn_rsh_divrem_hensel_qr_1_1(qp, xp, n, d, s, cin); return mpn_rsh_divrem_hensel_qr_1_2(qp, xp, n, d, s, cin); }
// basic divexact mp_limb_t divexact_basic(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d) {int j;mp_limb_t c,h,q,dummy,h1,t,m; ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n)); ASSERT(d%2==1);modlimb_invert(m,d); c=0;h=0;t=0; for(j=0;j<=n-1;j++) {h1=xp[j]; t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow q=h1*m; qp[j]=q; umul_ppmm(h,dummy,q,d); ASSERT(dummy==h1);} // ie returns next quotient*-d return h+c;} // so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
mp_limb_t divexact3_direct(mp_ptr qp,mp_ptr xp,mp_size_t n) {int j;mp_limb_t c,m,acc,ax,dx; ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n)); m=0;m=~m;m=m/3;// m=(B-1)/3 c=0;t1=t2=t3=acc=0; for(j=0;j<=n-1;j++) { umul_ppmm(dx,ax,xp[j],m); SBB(c,acc,acc,ax,c); qp[j]=acc; SBB(c,acc,acc,dx,c); } SBB(c,acc,acc,0,c); // return next quotient*-3 return acc*-3;} // so (xp,n) = (qp,n)*3 -ret*B^n and 0 <= ret < 3
mp_limb_t divexact3_byluck(mp_ptr qp,mp_ptr xp,mp_size_t n) {int j;mp_limb_t c,m,acc,ax,dx; ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n)); m=0;m=~m;m=m/3;// m=(B-1)/3 c=0;acc=0; for(j=0;j<=n-1;j++) { umul_ppmm(dx,ax,xp[j],m); // line 1 SUB(c,acc,acc,ax); // line 2 qp[j]=acc; // line 3 SBB(c,acc,acc,dx,c); // line 4 if(c!=0){printf("c not zero\n");abort();} } // return next quotient*-3 return acc*-3;} // so (xp,n) = (qp,n)*3 -ret*B^n and 0 <= ret < 3
/* (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d */ mp_limb_t mpn_divrem_hensel_qr_1_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d) { mp_size_t j; mp_limb_t c, h, q, dummy, h1, t, m; ASSERT(n > 0); ASSERT_MPN(xp, n); ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n)); ASSERT(d%2 == 1); modlimb_invert(m, d); c = 0; h = 0; t = 0; for (j = 0; j <= n - 1; j++) { h1 = xp[j]; t = h + c; if (t > h1) { h1 = h1 - t; c = 1; } else { h1 = h1 - t; c = 0; } q = h1*m; qp[j] = q; umul_ppmm(h, dummy, q, d); ASSERT(dummy == h1); } return h + c; }
/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}. Requires that uh < d. */ mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh, mp_limb_t d, mp_limb_t dinv) { ASSERT (n > 0); ASSERT (uh < d); ASSERT (d & GMP_NUMB_HIGHBIT); ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n)); do { mp_limb_t q, ul; ul = up[--n]; udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv); qp[n] = q; } while (n > 0); return uh; }
void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) { mp_limb_t inverse, lshift_mask, s, sr, s_next, c, h, x, y, q, dummy; unsigned rshift, lshift; ASSERT (size >= 1); ASSERT (divisor != 0); ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); ASSERT_MPN (src, size); ASSERT_LIMB (divisor); s_next = *src++; /* src[0] */ rshift = 0; lshift_mask = 0; if ((divisor & 1) == 0) { count_trailing_zeros (rshift, divisor); lshift_mask = MP_LIMB_T_MAX; divisor >>= rshift; }
void mpn_div_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch) { mp_ptr new_dp, new_np, tp, rp; mp_limb_t cy, dh, qh; mp_size_t new_nn, qn; gmp_pi1_t dinv; int cnt; TMP_DECL; TMP_MARK; ASSERT (nn >= dn); ASSERT (dn > 0); ASSERT (dp[dn - 1] != 0); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); ASSERT_ALWAYS (FUDGE >= 2); if (dn == 1) { mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); return; } qn = nn - dn + 1; /* Quotient size, high limb might be zero */ if (qn + FUDGE >= dn) { /* |________________________| |_______| */ new_np = scratch; dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np, nn, cnt); new_np[nn] = cy; new_nn = nn + (cy != 0); new_dp = TMP_ALLOC_LIMBS (dn); mpn_lshift (new_dp, dp, dn, cnt); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); } if (cy == 0) qp[qn - 1] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - dn; for (i = 0; i < n; i++) qp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ { if (new_np != np) MPN_COPY (new_np, np, nn); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); } qp[nn - dn] = qh; } } else { /* |________________________| |_________________| */ tp = TMP_ALLOC_LIMBS (qn + 1); new_np = scratch; new_nn = 2 * qn + 1; if (new_np == np) /* We need {np,nn} to remain untouched until the final adjustment, so we need to allocate separate space for new_np. */ new_np = TMP_ALLOC_LIMBS (new_nn + 1); dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); new_np[new_nn] = cy; new_nn += (cy != 0); new_dp = TMP_ALLOC_LIMBS (qn + 1); mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); if (qn + 1 == 2) { qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); } else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); } else { mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); } if (cy == 0) tp[qn] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - (qn + 1); for (i = 0; i < n; i++) tp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ {
/* ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1 needs (tp, 2n) temp space, everything reduced mod 2^b inputs, outputs are fully reduced N.B: 2n is not the same as 2b rounded up to nearest limb! */ inline static int mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, mpir_ui b, mp_ptr tp) { mp_size_t n, k; mp_limb_t c; TMP_DECL; n = BITS_TO_LIMBS (b); k = GMP_NUMB_BITS * n - b; ASSERT(b > 0); ASSERT(n > 0); ASSERT_MPN(yp, n); ASSERT_MPN(zp, n); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n)); ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n)); ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); #ifndef TUNE_PROGRAM_BUILD if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n)) { mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_ptr tx, ty, tz; mp_limb_t ret; TMP_MARK; tx = TMP_BALLOC_LIMBS(3*n + 3); ty = tx + n + 1; tz = ty + n + 1; MPN_COPY(ty, yp, n); MPN_COPY(tz, zp, n); ty[n] = 0; tz[n] = 0; while ((((mp_limb_t)1)<<depth) < b) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = b/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1); MPN_COPY(xp, tx, n); ret = tx[n]; TMP_FREE; return ret; } #endif if (yp == zp) mpn_sqr(tp, yp, n); else mpn_mul_n (tp, yp, zp, n); if (k == 0) { c = mpn_sub_n (xp, tp, tp + n, n); return mpn_add_1 (xp, xp, n, c); } c = tp[n - 1]; tp[n - 1] &= GMP_NUMB_MASK >> k; #if HAVE_NATIVE_mpn_sublsh_nc c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); #else { mp_limb_t c1; c1 = mpn_lshift (tp + n, tp + n, n, k); tp[n] |= c >> (GMP_NUMB_BITS - k); c = mpn_sub_n (xp, tp, tp + n, n) + c1; } #endif c = mpn_add_1 (xp, xp, n, c); xp[n - 1] &= GMP_NUMB_MASK >> k; return c; }
/* t = x + y - z */ int mpn_addsub_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n) { mp_limb_t ret; ASSERT(n > 0); ASSERT_MPN(x, n); ASSERT_MPN(y, n); ASSERT_MPN(z, n); ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n)); if (t == x && t == y && t == z) return 0; if (t == x && t == y) { ret = mpn_add_n(t, x, y, n); ret -= mpn_sub_n(t, t, z, n); return ret; } if (t == x && t == z) { ret = -mpn_sub_n(t, x, z, n); ret += mpn_add_n(t, t, y, n); return ret; } if (t == y && t == z) { ret = -mpn_sub_n(t, y, z, n); ret += mpn_add_n(t, t, x, n); return ret; } if (t == x) { ret = mpn_add_n(t, x, y, n); ret -= mpn_sub_n(t, t, z, n); return ret; } if (t == y) { ret = mpn_add_n(t, y, x, n); ret -= mpn_sub_n(t, t, z, n); return ret; } if (t == z) { ret = -mpn_sub_n(t, x, z, n); ret += mpn_add_n(t, t, y, n); return ret; } ret = mpn_add_n(t, x, y, n); ret -= mpn_sub_n(t, t, z, n); return ret; }