int mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp) { int neg; ASSERT (x3n > 0); ASSERT (x3n <= n); xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n); tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n); neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0; #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1); else mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1); #else if (neg) mpn_sub_n (xm1, tp, xp1, n + 1); else mpn_sub_n (xm1, xp1, tp, n + 1); mpn_add_n (xp1, xp1, tp, n + 1); #endif ASSERT (xp1[n] <= 3); ASSERT (xm1[n] <= 1); return neg; }
int main (int argc, char **argv) { mp_ptr r1p, r2p, s1p, s2p; double t; mp_size_t n; n = strtol (argv[1], 0, 0); r1p = malloc (n * GMP_LIMB_BYTES); r2p = malloc (n * GMP_LIMB_BYTES); s1p = malloc (n * GMP_LIMB_BYTES); s2p = malloc (n * GMP_LIMB_BYTES); TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n))); printf (" separate add and sub: %.3f\n", t); TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n)); printf ("combined addsub separate variables: %.3f\n", t); TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r1 overlap: %.3f\n", t); TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r2 overlap: %.3f\n", t); TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n)); printf (" combined addsub in-place: %.3f\n", t); return 0; }
/* Needs n+1 limbs of temporary storage. */ int mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp) { mp_limb_t cy; int neg; ASSERT (x3n > 0); ASSERT (x3n <= n); /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */ #if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n #if HAVE_NATIVE_mpn_addlsh2_n xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n); cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n); #else /* HAVE_NATIVE_mpn_addlsh_n */ xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2); cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2); #endif if (x3n < n) cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy); tp[n] = cy; #else cy = mpn_lshift (tp, xp + 2*n, n, 2); xp2[n] = cy + mpn_add_n (xp2, tp, xp, n); tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2); if (x3n < n) tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1); else tp[n] += mpn_add_n (tp, xp + n, tp, n); #endif mpn_lshift (tp, tp, n+1, 1); neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1); else mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1); #else if (neg) mpn_sub_n (xm2, tp, xp2, n + 1); else mpn_sub_n (xm2, xp2, tp, n + 1); mpn_add_n (xp2, xp2, tp, n + 1); #endif ASSERT (xp2[n] < 15); ASSERT (xm2[n] < 10); return neg; }
/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */ int mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) { unsigned i; int neg; ASSERT (k >= 4); ASSERT (hn > 0); ASSERT (hn <= n); /* The degree k is also the number of full-size coefficients, so * that last coefficient, of size hn, starts at xp + k*n. */ xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n); for (i = 4; i < k; i += 2) ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n)); tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n); for (i = 5; i < k; i += 2) ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n)); if (k & 1) ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn)); else ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn)); neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0; #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1); else mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1); #else if (neg) mpn_sub_n (xm1, tp, xp1, n + 1); else mpn_sub_n (xm1, xp1, tp, n + 1); mpn_add_n (xp1, xp1, tp, n + 1); #endif ASSERT (xp1[n] <= k); ASSERT (xm1[n] <= k/2 + 1); return neg; }
void mpn_toom3_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) { mp_size_t n, s; mp_limb_t cy, vinf0; mp_ptr gp; mp_ptr as1, asm1, as2; #define a0 ap #define a1 (ap + n) #define a2 (ap + 2*n) n = (an + 2) / (size_t) 3; s = an - 2 * n; ASSERT (0 < s && s <= n); as1 = scratch + 4 * n + 4; asm1 = scratch + 2 * n + 2; as2 = pp + n + 1; gp = scratch; /* Compute as1 and asm1. */ cy = mpn_add (gp, a0, n, a2, s); #if HAVE_NATIVE_mpn_add_n_sub_n if (cy == 0 && mpn_cmp (gp, a1, n) < 0) { cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n); as1[n] = cy >> 1; asm1[n] = 0; }
void mpn_toom33_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { const int __gmpn_cpuvec_initialized = 1; mp_size_t n, s, t; int vm1_neg; mp_limb_t cy, vinf0; mp_ptr gp; mp_ptr as1, asm1, as2; mp_ptr bs1, bsm1, bs2; #define a0 ap #define a1 (ap + n) #define a2 (ap + 2*n) #define b0 bp #define b1 (bp + n) #define b2 (bp + 2*n) n = (an + 2) / (size_t) 3; s = an - 2 * n; t = bn - 2 * n; ASSERT (an >= bn); ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); as1 = scratch + 4 * n + 4; asm1 = scratch + 2 * n + 2; as2 = pp + n + 1; bs1 = pp; bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */ bs2 = pp + 2 * n + 2; gp = scratch; vm1_neg = 0; /* Compute as1 and asm1. */ cy = mpn_add (gp, a0, n, a2, s); #if HAVE_NATIVE_mpn_add_n_sub_n if (cy == 0 && mpn_cmp (gp, a1, n) < 0) { cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n); as1[n] = cy >> 1; asm1[n] = 0; vm1_neg = 1; }
/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */ int mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift, mp_ptr tp) { unsigned i; int neg; #ifdef HAVE_NATIVE_mpn_addlsh_n mp_limb_t cy; #endif ASSERT (k >= 3); ASSERT (shift*k < GMP_NUMB_BITS); ASSERT (hn > 0); ASSERT (hn <= n); /* The degree k is also the number of full-size coefficients, so * that last coefficient, of size hn, starts at xp + k*n. */ #ifdef HAVE_NATIVE_mpn_addlsh_n xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift); for (i = 4; i < k; i += 2) xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift); tp[n] = mpn_lshift (tp, xp+n, n, shift); for (i = 3; i < k; i+= 2) tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift); if (k & 1) { cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift); MPN_INCR_U (tp + hn, n+1 - hn, cy); } else { cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift); MPN_INCR_U (xp2 + hn, n+1 - hn, cy); } #else /* !HAVE_NATIVE_mpn_addlsh_n */ xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift); xp2[n] += mpn_add_n (xp2, xp, tp, n); for (i = 4; i < k; i += 2) { xp2[n] += mpn_lshift (tp, xp + ((mp_size_t) i)*n, n, i*shift); xp2[n] += mpn_add_n (xp2, xp2, tp, n); } tp[n] = mpn_lshift (tp, xp+n, n, shift); for (i = 3; i < k; i+= 2) { tp[n] += mpn_lshift (xm2, xp + ((mp_size_t) i)*n, n, i*shift); tp[n] += mpn_add_n (tp, tp, xm2, n); } xm2[hn] = mpn_lshift (xm2, xp + ((mp_size_t) k)*n, hn, k*shift); if (k & 1) mpn_add (tp, tp, n+1, xm2, hn+1); else mpn_add (xp2, xp2, n+1, xm2, hn+1); #endif /* !HAVE_NATIVE_mpn_addlsh_n */ neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; #ifdef HAVE_NATIVE_mpn_add_n_sub_n if (neg) mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1); else mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1); #else /* !HAVE_NATIVE_mpn_add_n_sub_n */ if (neg) mpn_sub_n (xm2, tp, xp2, n + 1); else mpn_sub_n (xm2, xp2, tp, n + 1); mpn_add_n (xp2, xp2, tp, n + 1); #endif /* !HAVE_NATIVE_mpn_add_n_sub_n */ /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */ ASSERT ((k+1)*shift >= GMP_LIMB_BITS || xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1)); ASSERT ((k+2)*shift >= GMP_LIMB_BITS || xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1)); return neg; }
void mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) { mp_limb_t cy; mp_size_t n3; mp_size_t n3p1; n3 = 3 * n; n3p1 = n3 + 1; #define r4 (pp + n3) /* 3n+1 */ #define r2 (pp + 7 * n) /* 3n+1 */ #define r0 (pp +11 * n) /* s+t <= 2*n */ /******************************* interpolation *****************************/ if (half != 0) { cy = mpn_sub_n (r3, r3, r0, spt); MPN_DECR_U (r3 + spt, n3p1 - spt, cy); cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi); MPN_DECR_U (r2 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi); cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi); MPN_DECR_U (r1 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi); }; r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi); DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r1, r4, r4, r1, n3p1); #else ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1)); mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */ MP_PTR_SWAP(r1, wsi); #endif r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi); DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); #else mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); MP_PTR_SWAP(r5, wsi); #endif r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n); #if AORSMUL_FASTER_AORS_AORSLSH mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */ #else mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */ DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */ #endif /* A division by 2835x4 follows. Warning: the operand can be negative! */ mpn_divexact_by2835x4(r4, r4, n3p1); if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); #if AORSMUL_FASTER_2AORSLSH mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */ #else DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */ DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */ #endif mpn_divexact_by255(r5, r5, n3p1); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi)); #if AORSMUL_FASTER_3AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100)); #else ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi)); #endif ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi)); mpn_divexact_by42525(r1, r1, n3p1); #if AORSMUL_FASTER_AORS_2AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225)); #else ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1)); ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi)); #endif mpn_divexact_by9x4(r2, r2, n3p1); ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1)); mpn_sub_n (r4, r2, r4, n3p1); ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1)); ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1)); mpn_add_n (r5, r5, r1, n3p1); ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); /* last interpolation steps... */ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1)); /* ... could be mixed with recomposition ||H-r5|M-r5|L-r5| ||H-r1|M-r1|L-r1| */ /***************************** recomposition *******************************/ /* pp[] prior to operations: |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp summation scheme for remaining operations: |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| */ cy = mpn_add_n (pp + n, pp + n, r5, n); cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy); #if HAVE_NATIVE_mpn_add_nc cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy); #else MPN_INCR_U (r5 + 2 * n, n + 1, cy); cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n); #endif MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy); pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n); cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]); #if HAVE_NATIVE_mpn_add_nc cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy); #else MPN_INCR_U (r3 + 2 * n, n + 1, cy); cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n); #endif MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n); if (half) { cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]); #if HAVE_NATIVE_mpn_add_nc if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy)); } #else MPN_INCR_U (r1 + 2 * n, n + 1, cy); if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt)); } #endif } else { ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n])); } #undef r0 #undef r2 #undef r4 }
void mpn_toom63_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; mp_limb_t cy; int sign; /***************************** decomposition *******************************/ #define a5 (ap + 5 * n) #define b0 (bp + 0 * n) #define b1 (bp + 1 * n) #define b2 (bp + 2 * n) ASSERT (an >= bn); n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); s = an - 5 * n; t = bn - 2 * n; ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); /* WARNING! it assumes s+t>=n */ ASSERT ( s + t >= n ); ASSERT ( s + t > 4); /* WARNING! it assumes n>1 */ ASSERT ( n > 2); #define r8 pp /* 2n */ #define r7 scratch /* 3n+1 */ #define r5 (pp + 3*n) /* 3n+1 */ #define v0 (pp + 3*n) /* n+1 */ #define v1 (pp + 4*n+1) /* n+1 */ #define v2 (pp + 5*n+2) /* n+1 */ #define v3 (pp + 6*n+3) /* n+1 */ #define r3 (scratch + 3 * n + 1) /* 3n+1 */ #define r1 (pp + 7*n) /* s+t <= 2*n */ #define ws (scratch + 6 * n + 2) /* ??? */ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may need all of them, when DO_mpn_sublsh_n usea a scratch */ /* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */ /********************** evaluation and recursive calls *********************/ /* $\pm4$ */ sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */ /* FIXME: use addlsh */ v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */ if ( n == t ) v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */ else v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */ sign ^= abs_sub_add_n (v1, v3, pp, n + 1); TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */ TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4); /* $\pm1$ */ sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); /* Compute bs1 and bsm1. Code taken from toom33 */ cy = mpn_add (ws, b0, n, b2, t); #if HAVE_NATIVE_mpn_add_n_sub_n if (cy == 0 && mpn_cmp (ws, b1, n) < 0) { cy = mpn_add_n_sub_n (v3, v1, b1, ws, n); v3[n] = cy >> 1; v1[n] = 0; sign = ~sign; }
/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the points +2 and -2. */ int mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) { int i; int neg; mp_limb_t cy; ASSERT (k >= 3); ASSERT (k < GMP_NUMB_BITS); ASSERT (hn > 0); ASSERT (hn <= n); /* The degree k is also the number of full-size coefficients, so * that last coefficient, of size hn, starts at xp + k*n. */ cy = 0; DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy); if (hn != n) cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy); for (i = k - 4; i >= 0; i -= 2) DO_addlsh2 (xp2, xp + i * n, xp2, n, cy); xp2[n] = cy; k--; cy = 0; DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy); for (i = k - 4; i >= 0; i -= 2) DO_addlsh2 (tp, xp + i * n, tp, n, cy); tp[n] = cy; if (k & 1) ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1)); else ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1)); neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1); else mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1); #else /* !HAVE_NATIVE_mpn_add_n_sub_n */ if (neg) mpn_sub_n (xm2, tp, xp2, n + 1); else mpn_sub_n (xm2, xp2, tp, n + 1); mpn_add_n (xp2, xp2, tp, n + 1); #endif /* !HAVE_NATIVE_mpn_add_n_sub_n */ ASSERT (xp2[n] < (1<<(k+2))-1); ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3); neg ^= ((k & 1) - 1); return neg; }