static void ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_ptr tp; mp_size_t tn; mp_limb_t cy; if (vn < TOOM3_THRESHOLD) { /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own mul_basecase. */ if (vn != 0) mul_basecase (wp, up, un, vp, vn); else MPN_ZERO (wp, un); return; } if (vn < FFT_THRESHOLD) { /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n. */ tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn); tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn); } else { /* Finally, for the largest operands, use mpn_toom3_mul_n. */ /* The "- 63 + 255" tweaks the allocation to allow for huge operands. See the definition of this macro in gmp-impl.h to understand this. */ tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255; tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn); } if (un != vn) { if (un - vn < vn) ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn); else ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn); MPN_COPY (wp, tp, vn); cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn); mpn_incr_u (wp + 2 * vn, cy); } else { MPN_COPY (wp, tp, 2 * vn); } __GMP_FREE_FUNC_LIMBS (tp, tn); }
void mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n) { ASSERT (n >= 1); ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n)); #if 0 /* FIXME: Can this be removed? */ if (n == 0) return; #endif if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ mpn_mul_basecase (p, a, n, a, n); } else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD)) { mpn_sqr_basecase (p, a, n); } else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) { /* Allocate workspace of fixed size on stack: fast! */ mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)]; ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT); mpn_kara_sqr_n (p, a, n, ws); } #if WANT_FFT || TUNE_PROGRAM_BUILD else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD)) #else else if (BELOW_THRESHOLD (n, MPN_TOOM3_MAX_N)) #endif { mp_ptr ws; TMP_SDECL; TMP_SMARK; ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n)); mpn_toom3_sqr_n (p, a, n, ws); TMP_SFREE; } else #if WANT_FFT || TUNE_PROGRAM_BUILD { /* The current FFT code allocates its own space. That should probably change. */ mpn_mul_fft_full (p, a, n, a, n); } #else { /* Toom3 for large operands. Use workspace from the heap, as stack space may be limited. Since n is at least MUL_TOOM3_THRESHOLD, multiplication will take much longer than malloc()/free(). */ mp_ptr ws; mp_size_t ws_size; ws_size = MPN_TOOM3_SQR_N_TSIZE (n); ws = __GMP_ALLOCATE_FUNC_LIMBS (ws_size); mpn_toom3_sqr_n (p, a, n, ws); __GMP_FREE_FUNC_LIMBS (ws, ws_size); } #endif }
void mpq_init (mpq_t x) { ALLOC(NUM(x)) = 1; PTR(NUM(x)) = __GMP_ALLOCATE_FUNC_LIMBS (1); SIZ(NUM(x)) = 0; ALLOC(DEN(x)) = 1; PTR(DEN(x)) = __GMP_ALLOCATE_FUNC_LIMBS (1); PTR(DEN(x))[0] = 1; SIZ(DEN(x)) = 1; #ifdef __CHECKER__ /* let the low limb look initialized, for the benefit of mpz_get_ui etc */ PTR(NUM(x))[0] = 0; #endif }
void mpf_init_set (mpf_ptr r, mpf_srcptr s) { mp_ptr rp, sp; mp_size_t ssize, size; mp_size_t prec; prec = __gmp_default_fp_limb_precision; r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1); r->_mp_prec = prec; prec++; /* lie not to lose precision in assignment */ ssize = s->_mp_size; size = ABS (ssize); rp = r->_mp_d; sp = s->_mp_d; if (size > prec) { sp += size - prec; size = prec; } r->_mp_exp = s->_mp_exp; r->_mp_size = ssize >= 0 ? size : -size; MPN_COPY (rp, sp, size); }
mp_limb_t mpn_sumdiff_n(mp_ptr s,mp_ptr d,mp_srcptr x,mp_srcptr y,mp_size_t n) {mp_limb_t ret;mp_ptr t; ASSERT(n>0); ASSERT_MPN(x,n);ASSERT_MPN(y,n);//ASSERT_SPACE(s,n);ASSERT_SPACE(d,n); ASSERT(MPN_SAME_OR_SEPARATE_P(s,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(s,y,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,y,n)); ASSERT(!MPN_OVERLAP_P(s,n,d,n)); if( (s==x && d==y)||(s==y && d==x) ) {t=__GMP_ALLOCATE_FUNC_LIMBS(n); ret=mpn_sub_n(t,x,y,n); ret+=2*mpn_add_n(s,x,y,n); MPN_COPY(d,t,n); __GMP_FREE_FUNC_LIMBS(t,n); return ret;} if(s==x || s==y) {ret=mpn_sub_n(d,x,y,n); ret+=2*mpn_add_n(s,x,y,n); return ret;} ret=2*mpn_add_n(s,x,y,n); ret+=mpn_sub_n(d,x,y,n); return ret;}
void mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth) { mp_ptr rootp, up, remp; mp_size_t us, un, rootn, remn; up = PTR(u); us = SIZ(u); /* even roots of negatives provoke an exception */ if (us < 0 && (nth & 1) == 0) SQRT_OF_NEGATIVE; /* root extraction interpreted as c^(1/nth) means a zeroth root should provoke a divide by zero, do this even if c==0 */ if (nth == 0) DIVIDE_BY_ZERO; if (us == 0) { if (root != NULL) SIZ(root) = 0; SIZ(rem) = 0; return; } un = ABS (us); rootn = (un - 1) / nth + 1; if (root != NULL) { rootp = MPZ_REALLOC (root, rootn); up = PTR(u); } else { rootp = __GMP_ALLOCATE_FUNC_LIMBS (rootn); } MPZ_REALLOC (rem, un); remp = PTR(rem); if (nth == 1) { MPN_COPY (rootp, up, un); remn = 0; } else { remn = mpn_rootrem (rootp, remp, up, un, nth); } if (root != NULL) SIZ(root) = us >= 0 ? rootn : -rootn; else __GMP_FREE_FUNC_LIMBS (rootp, rootn); SIZ(rem) = remn; }
void mpf_init (mpf_ptr r) { mp_size_t prec = __gmp_default_fp_limb_precision; r->_mp_size = 0; r->_mp_exp = 0; r->_mp_prec = prec; r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1); }
int mpf_init_set_str (mpf_ptr r, const char *s, int base) { mp_size_t prec = __gmp_default_fp_limb_precision; r->_mp_size = 0; r->_mp_exp = 0; r->_mp_prec = prec; r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1); return mpf_set_str (r, s, base); }
void mpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size) { ASSERT (size >= 0); MPN_NORMALIZE (p, size); ALLOC(z) = MAX (size, 1); PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z)); SIZ(z) = size; MPN_COPY (PTR(z), p, size); }
void mpz_init (mpz_ptr x) { ALLOC (x) = 1; PTR (x) = __GMP_ALLOCATE_FUNC_LIMBS (1); SIZ (x) = 0; #ifdef __CHECKER__ /* let the low limb look initialized, for the benefit of mpz_get_ui etc */ PTR (x)[0] = 0; #endif }
void mpn_sqr_n (mp_ptr prodp, mp_srcptr up, mp_size_t un) { ASSERT (un >= 1); ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un)); /* FIXME: Can this be removed? */ if (un == 0) return; if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD)) { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ mpn_mul_basecase (prodp, up, un, up, un); } else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD)) { /* plain schoolbook multiplication */ mpn_sqr_basecase (prodp, up, un); } else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD)) { /* karatsuba multiplication */ mp_ptr tspace; TMP_DECL (marker); TMP_MARK (marker); tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un)); mpn_kara_sqr_n (prodp, up, un, tspace); TMP_FREE (marker); } #if WANT_FFT || TUNE_PROGRAM_BUILD else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD)) #else else #endif { /* Toom3 multiplication. Use workspace from the heap, as stack may be limited. Since n is at least MUL_TOOM3_THRESHOLD, the multiplication will take much longer than malloc()/free(). */ mp_ptr tspace; mp_size_t tsize; tsize = MPN_TOOM3_SQR_N_TSIZE (un); tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize); mpn_toom3_sqr_n (prodp, up, un, tspace); __GMP_FREE_FUNC_LIMBS (tspace, tsize); } #if WANT_FFT || TUNE_PROGRAM_BUILD else {
void mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits) { mp_ptr p; mp_size_t i; mp_size_t nlimbs; nlimbs = nbits / GMP_NUMB_BITS + 1; p = __GMP_ALLOCATE_FUNC_LIMBS (arr_size * nlimbs); for (i = 0; i < arr_size; i++) { ALLOC (&arr[i]) = nlimbs + 1; /* Yes, lie a little... */ SIZ (&arr[i]) = 0; PTR (&arr[i]) = p + i * nlimbs; } }
static void refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v) { mp_size_t usize = u->_mp_size; mp_size_t vsize = v->_mp_size; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_size_t talloc; sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize == 0 || vsize == 0) { SIZ (w) = 0; return; } talloc = usize + vsize; up = u->_mp_d; vp = v->_mp_d; wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc); if (usize > vsize) refmpn_mul (wp, up, usize, vp, vsize); else refmpn_mul (wp, vp, vsize, up, usize); wsize = usize + vsize; wsize -= wp[wsize - 1] == 0; MPZ_REALLOC (w, wsize); MPN_COPY (PTR(w), wp, wsize); SIZ(w) = sign_product < 0 ? -wsize : wsize; __GMP_FREE_FUNC_LIMBS (wp, talloc); }
void mpz_inits (mpz_ptr x, ...) { va_list ap; va_start (ap, x); while (x != NULL) { ALLOC (x) = 1; PTR (x) = __GMP_ALLOCATE_FUNC_LIMBS (1); SIZ (x) = 0; #ifdef __CHECKER__ /* let the low limb look initialized, for the benefit of mpz_get_ui etc */ PTR (x)[0] = 0; #endif x = va_arg (ap, mpz_ptr); } va_end (ap); }
void mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n) { mp_size_t len1, ind; mp_limb_t cy, r30, r31; mp_ptr tp; mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4; len1 = n; ASSERT (n >= 1); MPN_NORMALIZE(up, len1); sn = (n - 1) / 4 + 1; /* a0 - a3 are defined in mpn_toom4_mul_n above */ TC4_NORM(a0, a0n, sn); TC4_NORM(a1, a1n, sn); TC4_NORM(a2, a2n, sn); TC4_NORM(a3, a3n, n - 3*sn); t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1)); tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); tc4_sub(u3, &n3, u4, n4, u5, n5); SQR_TC4(r4, n4, u3, n3); SQR_TC4_UNSIGNED(r3, n3, u2, n2); tc4_lshift(r1, &n1, a0, a0n, 3); tc4_addlsh1_unsigned(r1, &n1, a2, a2n); tc4_lshift(r2, &n8, a1, a1n, 2); tc4_add(r2, &n8, r2, n8, a3, a3n); tc4_add(u4, &n9, r1, n1, r2, n8); tc4_sub(u5, &n5, r1, n1, r2, n8); r30 = r3[0]; if (!n3) r30 = CNST_LIMB(0); r31 = r3[1]; SQR_TC4(r6, n6, u5, n5); SQR_TC4_UNSIGNED(r5, n5, u4, n9); r3[1] = r31; tc4_lshift(u2, &n8, a3, a3n, 3); tc4_addmul_1(u2, &n8, a2, a2n, 4); tc4_addlsh1_unsigned(u2, &n8, a1, a1n); tc4_add(u2, &n8, u2, n8, a0, a0n); SQR_TC4_UNSIGNED(r2, n2, u2, n8); SQR_TC4_UNSIGNED(r1, n1, a3, a3n); SQR_TC4_UNSIGNED(r7, n7, a0, a0n); TC4_DENORM(r1, n1, t4 - 1); TC4_DENORM(r2, n2, t4 - 1); if (n3) TC4_DENORM(r3, n3, t4 - 1); else { /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */ for (ind = 1 ; ind < t4 - 1; ind++) (r3)[ind] = CNST_LIMB(0); } TC4_DENORM(r4, n4, t4 - 1); TC4_DENORM(r5, n5, t4 - 1); TC4_DENORM(r6, n6, t4 - 1); TC4_DENORM(r7, n7, t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length) /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != 2*n) { MPN_ZERO((rp + rpn), 2*n - rpn); } __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1)); }
int main (int argc, char **argv) { gmp_randstate_ptr rands; unsigned long maxnbits, maxdbits, nbits, dbits; mpz_t n, d, tz; mp_size_t maxnn, maxdn, nn, dn, clearn, i; mp_ptr np, dp, qp, rp; mp_limb_t rh; mp_limb_t t; mp_limb_t dinv; int count = COUNT; mp_ptr scratch; mp_limb_t ran; mp_size_t alloc, itch; mp_limb_t rran0, rran1, qran0, qran1; TMP_DECL; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } maxdbits = MAX_DN; maxnbits = MAX_NN; tests_start (); rands = RANDS; mpz_init (n); mpz_init (d); mpz_init (tz); maxnn = maxnbits / GMP_NUMB_BITS + 1; maxdn = maxdbits / GMP_NUMB_BITS + 1; TMP_MARK; qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; alloc = 1; scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc); for (test = 0; test < count;) { nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS; if (maxdbits > nbits) dbits = random_word (rands) % nbits + 1; else dbits = random_word (rands) % maxdbits + 1; #if RAND_UNIFORM #define RANDFUNC mpz_urandomb #else #define RANDFUNC mpz_rrandomb #endif do { RANDFUNC (n, rands, nbits); do { RANDFUNC (d, rands, dbits); } while (mpz_sgn (d) == 0); np = PTR (n); dp = PTR (d); nn = SIZ (n); dn = SIZ (d); } while (nn < dn); dp[0] |= 1; mpz_urandomb (tz, rands, 32); t = mpz_get_ui (tz); if (t % 17 == 0) dp[0] = GMP_NUMB_MAX; switch ((int) t % 16) { case 0: clearn = random_word (rands) % nn; for (i = 0; i <= clearn; i++) np[i] = 0; break; case 1: mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands)); break; case 2: mpn_add_1 (np + nn - dn, dp, dn, random_word (rands)); break; } test++; binvert_limb (dinv, dp[0]); rran0 = random_word (rands); rran1 = random_word (rands); qran0 = random_word (rands); qran1 = random_word (rands); qp[-1] = qran0; qp[nn - dn + 1] = qran1; rp[-1] = rran0; ran = random_word (rands); if ((double) (nn - dn) * dn < 1e5) { if (nn > dn) { /* Test mpn_sbpi1_bdiv_qr */ MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); MPN_COPY (rp, np, nn); rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr"); } if (nn > dn) { /* Test mpn_sbpi1_bdiv_q */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q"); } } if (dn >= 4 && nn - dn >= 2) { /* Test mpn_dcpi1_bdiv_qr */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr"); } if (dn >= 4 && nn - dn >= 2) { /* Test mpn_dcpi1_bdiv_q */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q"); } if (nn > dn) { /* Test mpn_bdiv_qr */ itch = mpn_bdiv_qr_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr"); } if (nn - dn < 2 || dn < 2) continue; /* Test mpn_mu_bdiv_qr */ itch = mpn_mu_bdiv_qr_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr"); /* Test mpn_mu_bdiv_q */ itch = mpn_mu_bdiv_q_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn + 1); mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q"); } __GMP_FREE_FUNC_LIMBS (scratch, alloc); TMP_FREE; mpz_clear (n); mpz_clear (d); mpz_clear (tz); tests_end (); return 0; }
/* For now, also disable REDC when MOD is even, as the inverse can't handle that. At some point, we might want to make the code faster for that case, perhaps using CRR. */ #ifndef POWM_THRESHOLD #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) #endif #define HANDLE_NEGATIVE_EXPONENT 1 #undef REDUCE_EXPONENT void #ifndef BERKELEY_MP mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) #else /* BERKELEY_MP */ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) #endif /* BERKELEY_MP */ { mp_ptr xp, tp, qp, gp, this_gp; mp_srcptr bp, ep, mp; mp_size_t bn, es, en, mn, xn; mp_limb_t invm, c; unsigned long int enb; mp_size_t i, K, j, l, k; int m_zero_cnt, e_zero_cnt; int sh; int use_redc; #if HANDLE_NEGATIVE_EXPONENT mpz_t new_b; #endif #if REDUCE_EXPONENT mpz_t new_e; #endif TMP_DECL (marker); mp = PTR(m); mn = ABSIZ (m); if (mn == 0) DIVIDE_BY_ZERO; TMP_MARK (marker); es = SIZ (e); if (es <= 0) { if (es == 0) { /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if m equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; TMP_FREE (marker); /* we haven't really allocated anything here */ return; } #if HANDLE_NEGATIVE_EXPONENT MPZ_TMP_INIT (new_b, mn + 1); if (! mpz_invert (new_b, b, m)) DIVIDE_BY_ZERO; b = new_b; es = -es; #else DIVIDE_BY_ZERO; #endif } en = es; #if REDUCE_EXPONENT /* Reduce exponent by dividing it by phi(m) when m small. */ if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) { MPZ_TMP_INIT (new_e, 2); mpz_mod_ui (new_e, e, phi (mp[0])); e = new_e; } #endif use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; if (use_redc) { /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ modlimb_invert (invm, mp[0]); invm = -invm; } else { /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp; new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } } /* Determine optimal value of k, the number of exponent bits we look at at a time. */ count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); e_zero_cnt -= GMP_NAIL_BITS; enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ k = 1; K = 2; while (2 * enb > K * (2 + k * (3 + k))) { k++; K *= 2; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); qp = TMP_ALLOC_LIMBS (mn + 1); gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ bn = ABSIZ (b); bp = PTR(b); /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary for speed or correctness to do this when b and m have the same number of limbs, perhaps remove mpn_cmp call. */ if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) { /* Reduce possibly huge base while moving it to gp[0]. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ if (use_redc) { reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ MPN_ZERO (tp, mn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ } else { reduce (gp, bp, bn, mp, mn); } } else { /* |b| < m. We pad out operands to become mn limbs, which simplifies the rest of the function, but slows things down when the |b| << m. */ if (use_redc) { MPN_ZERO (tp, mn); MPN_COPY (tp + mn, bp, bn); MPN_ZERO (tp + mn + bn, mn - bn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); } else { MPN_COPY (gp, bp, bn); MPN_ZERO (gp + bn, mn - bn); } } /* Compute xx^i for odd g < 2^i. */ xp = TMP_ALLOC_LIMBS (mn); mpn_sqr_n (tp, gp, mn); if (use_redc) redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */ else mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); this_gp = gp; for (i = 1; i < K / 2; i++) { mpn_mul_n (tp, this_gp, xp, mn); this_gp += mn; if (use_redc) redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */ else mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); } /* Start the real stuff. */ ep = PTR (e); i = en - 1; /* current index */ c = ep[i]; /* current limb */ sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ sh -= k; /* index of lower bit of ep[i] to take into account */ if (sh < 0) { /* k-sh extra bits are needed */ if (i > 0) { i--; c <<= (-sh); sh += GMP_NUMB_BITS; c |= ep[i] >> sh; } }
void mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) { mp_size_t usize; mp_size_t vsize; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_ptr free_me; size_t free_me_size; mp_limb_t cy_limb; TMP_DECL; usize = SIZ (u); vsize = SIZ (v); sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize < vsize) { MPZ_SRCPTR_SWAP (u, v); MP_SIZE_T_SWAP (usize, vsize); } if (vsize == 0) { SIZ (w) = 0; return; } #if HAVE_NATIVE_mpn_mul_2 if (vsize <= 2) { wp = MPZ_REALLOC (w, usize+vsize); if (vsize == 1) cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); else { cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v)); usize++; } wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #else if (vsize == 1) { wp = MPZ_REALLOC (w, usize+1); cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #endif TMP_MARK; free_me = NULL; up = PTR (u); vp = PTR (v); wp = PTR (w); /* Ensure W has space enough to store the result. */ wsize = usize + vsize; if (ALLOC (w) < wsize) { if (wp == up || wp == vp) { free_me = wp; free_me_size = ALLOC (w); } else (*__gmp_free_func) (wp, (size_t) ALLOC (w) * GMP_LIMB_BYTES); ALLOC (w) = wsize; wp = __GMP_ALLOCATE_FUNC_LIMBS (wsize); PTR (w) = wp; } else { /* Make U and V not overlap with W. */ if (wp == up) { /* W and U are identical. Allocate temporary space for U. */ up = TMP_ALLOC_LIMBS (usize); /* Is V identical too? Keep it identical with U. */ if (wp == vp) vp = up; /* Copy to the temporary space. */ MPN_COPY (up, wp, usize); } else if (wp == vp) { /* W and V are identical. Allocate temporary space for V. */ vp = TMP_ALLOC_LIMBS (vsize); /* Copy to the temporary space. */ MPN_COPY (vp, wp, vsize); } } if (up == vp) { mpn_sqr (wp, up, usize); cy_limb = wp[wsize - 1]; } else { cy_limb = mpn_mul (wp, up, usize, vp, vsize); } wsize -= cy_limb == 0; SIZ (w) = sign_product < 0 ? -wsize : wsize; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * GMP_LIMB_BYTES); TMP_FREE; }
int main (int argc, char **argv) { gmp_randstate_ptr rands; unsigned long maxnbits, maxdbits, nbits, dbits; mpz_t n, d, q, r, tz, junk; mp_size_t maxnn, maxdn, nn, dn, clearn, i; mp_ptr np, dup, dnp, qp, rp, junkp; mp_limb_t t; gmp_pi1_t dinv; long count = COUNT; mp_ptr scratch; mp_limb_t ran; mp_size_t alloc, itch; mp_limb_t rran0, rran1, qran0, qran1; TMP_DECL; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } maxdbits = MAX_DN; maxnbits = MAX_NN; tests_start (); rands = RANDS; mpz_init (n); mpz_init (d); mpz_init (q); mpz_init (r); mpz_init (tz); mpz_init (junk); maxnn = maxnbits / GMP_NUMB_BITS + 1; maxdn = maxdbits / GMP_NUMB_BITS + 1; TMP_MARK; qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; dnp = TMP_ALLOC_LIMBS (maxdn); alloc = 1; scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc); for (test = -300; test < count; test++) { nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS; if (test < 0) dbits = (test + 300) % (nbits - 1) + 1; else dbits = random_word (rands) % (nbits - 1) % maxdbits + 1; #if RAND_UNIFORM #define RANDFUNC mpz_urandomb #else #define RANDFUNC mpz_rrandomb #endif do RANDFUNC (d, rands, dbits); while (mpz_sgn (d) == 0); dn = SIZ (d); dup = PTR (d); MPN_COPY (dnp, dup, dn); dnp[dn - 1] |= GMP_NUMB_HIGHBIT; if (test % 2 == 0) { RANDFUNC (n, rands, nbits); nn = SIZ (n); ASSERT_ALWAYS (nn >= dn); } else { do { RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1)); RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2)); mpz_mul (n, q, d); mpz_add (n, n, r); nn = SIZ (n); } while (nn > maxnn || nn < dn); } ASSERT_ALWAYS (nn <= maxnn); ASSERT_ALWAYS (dn <= maxdn); mpz_urandomb (junk, rands, nbits); junkp = PTR (junk); np = PTR (n); mpz_urandomb (tz, rands, 32); t = mpz_get_ui (tz); if (t % 17 == 0) { dnp[dn - 1] = GMP_NUMB_MAX; dup[dn - 1] = GMP_NUMB_MAX; } switch ((int) t % 16) { case 0: clearn = random_word (rands) % nn; for (i = clearn; i < nn; i++) np[i] = 0; break; case 1: mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands)); break; case 2: mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands)); break; } if (dn >= 2) invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]); rran0 = random_word (rands); rran1 = random_word (rands); qran0 = random_word (rands); qran1 = random_word (rands); qp[-1] = qran0; qp[nn - dn + 1] = qran1; rp[-1] = rran0; ran = random_word (rands); if ((double) (nn - dn) * dn < 1e5) { /* Test mpn_sbpi1_div_qr */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0); } /* Test mpn_sbpi1_divappr_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1); } /* Test mpn_sbpi1_div_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0); } /* Test mpn_sb_div_qr_sec */ itch = 3 * nn + 4; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); if (nn >= dn) MPN_COPY (qp, junkp, nn - dn + 1); mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0); /* Test mpn_sb_div_r_sec */ itch = nn + 2 * dn + 2; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); mpn_sb_div_r_sec (rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); /* Note: Since check_one cannot cope with random-only functions, we pass qp[] from the previous function, mpn_sb_div_qr_sec. */ check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0); } /* Test mpn_dcpi1_div_qr */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0); } /* Test mpn_dcpi1_divappr_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1); } /* Test mpn_dcpi1_div_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0); } /* Test mpn_mu_div_qr */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_qr_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0); } /* Test mpn_mu_divappr_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_divappr_q_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4); } /* Test mpn_mu_div_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_q_itch (nn, dn, 0); if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0); } if (1) { itch = nn + 1; if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; mpn_div_q (qp, np, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0); } if (dn >= 2 && nn >= 2) { mp_limb_t qh; /* mpn_divrem_2 */ MPN_COPY (rp, np, nn); qp[nn - 2] = qp[nn-1] = qran1; qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0); /* Missing: divrem_2 with fraction limbs. */ /* mpn_div_qr_2 */ qp[nn - 2] = qran1; qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0); } } __GMP_FREE_FUNC_LIMBS (scratch, alloc); TMP_FREE; mpz_clear (n); mpz_clear (d); mpz_clear (q); mpz_clear (r); mpz_clear (tz); mpz_clear (junk); tests_end (); return 0; }
/* Multiply {up, un} by {vp, vn} and write the result to {prodp, un + vn} assuming vn > 2*ceil(un/5). Note that prodp gets un + vn limbs stored, even if the actual result only needs un + vn - 1. */ void mpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_size_t ind; mp_limb_t cy, r30, r31; mp_ptr tp; mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4; sn = (un + 4) / 5; ASSERT (vn > 2*sn); #define a0 (up) #define a1 (up + sn) #define a2 (up + 2*sn) #define a3 (up + 3*sn) #define a4 (up + 4*sn) #define b0 (vp) #define b1 (vp + sn) #define b2 (vp + 2*sn) TC4_NORM(a0, a0n, sn); TC4_NORM(a1, a1n, sn); TC4_NORM(a2, a2n, sn); TC4_NORM(a3, a3n, sn); TC4_NORM(a4, a4n, un - 4*sn); TC4_NORM(b0, b0n, sn); TC4_NORM(b1, b1n, sn); TC4_NORM(b2, b2n, vn - 2*sn); t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1)); #define u2 (tp + 4*t4) #define u3 (tp + 4*t4 + (sn+1)) #define u4 (tp + 4*t4 + 2*(sn+1)) #define u5 (tp + 4*t4 + 3*(sn+1)) tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); tc4_sub(u4, &n4, u5, n5, u2, n2); tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n); tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); tc4_sub(u5, &n5, u5, n5, b1, b1n); MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */ MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */ tc4_lshift(r1, &n1, a0, a0n, 4); tc4_lshift(u3, &n9, a2, a2n, 2); tc4_add_unsigned(r1, &n1, r1, n1, u3, n9); tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n); tc4_lshift(r2, &n8, a1, a1n, 3); tc4_addlsh1_unsigned(r2, &n8, a3, a3n); tc4_add_unsigned(u5, &n5, r1, n1, r2, n8); tc4_sub(u3, &n9, r1, n1, r2, n8); tc4_lshift(r1, &n1, b0, b0n, 2); tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n); tc4_lshift(u4, &n10, b1, b1n, 1); tc4_add_unsigned(u2, &n2, r1, n1, u4, n10); tc4_sub(r2, &n8, r1, n1, u4, n10); r30 = r3[0]; if (!n3) r30 = CNST_LIMB(0); r31 = r3[1]; MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */ MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */ r3[1] = r31; tc4_lshift(u2, &n2, a4, a4n, 4); tc4_addmul_1(u2, &n2, a3, a3n, 8); tc4_addmul_1(u2, &n2, a2, a2n, 4); tc4_addlsh1_unsigned(u2, &n2, a1, a1n); tc4_add(u2, &n2, u2, n2, a0, a0n); tc4_lshift(r1, &n1, b2, b2n, 2); tc4_addlsh1_unsigned(r1, &n1, b1, b1n); tc4_add(r1, &n1, r1, n1, b0, b0n); MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */ MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */ MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */ TC4_DENORM(r1, n1, t4 - 1); TC4_DENORM(r2, n2, t4 - 1); if (n3) TC4_DENORM(r3, n3, t4 - 1); else { /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */ for (ind = 1 ; ind < t4 - 1; ind++) (r3)[ind] = CNST_LIMB(0); } TC4_DENORM(r4, n4, t4 - 1); TC4_DENORM(r5, n5, t4 - 1); TC4_DENORM(r6, n6, t4 - 1); TC4_DENORM(r7, n7, t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length) /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != un + vn) { MPN_ZERO((rp + rpn), un + vn - rpn); } __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1)); }