mp_limb_t mpn_sumdiff_n(mp_ptr s,mp_ptr d,mp_srcptr x,mp_srcptr y,mp_size_t n) {mp_limb_t ret;mp_ptr t; ASSERT(n>0); ASSERT_MPN(x,n);ASSERT_MPN(y,n);//ASSERT_SPACE(s,n);ASSERT_SPACE(d,n); ASSERT(MPN_SAME_OR_SEPARATE_P(s,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(s,y,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(d,y,n)); ASSERT(!MPN_OVERLAP_P(s,n,d,n)); if( (s==x && d==y)||(s==y && d==x) ) {t=__GMP_ALLOCATE_FUNC_LIMBS(n); ret=mpn_sub_n(t,x,y,n); ret+=2*mpn_add_n(s,x,y,n); MPN_COPY(d,t,n); __GMP_FREE_FUNC_LIMBS(t,n); return ret;} if(s==x || s==y) {ret=mpn_sub_n(d,x,y,n); ret+=2*mpn_add_n(s,x,y,n); return ret;} ret=2*mpn_add_n(s,x,y,n); ret+=mpn_sub_n(d,x,y,n); return ret;}
void mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n) { ASSERT (n >= 1); ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n)); #if 0 /* FIXME: Can this be removed? */ if (n == 0) return; #endif if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ mpn_mul_basecase (p, a, n, a, n); } else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD)) { mpn_sqr_basecase (p, a, n); } else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) { /* Allocate workspace of fixed size on stack: fast! */ mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)]; ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT); mpn_kara_sqr_n (p, a, n, ws); } #if WANT_FFT || TUNE_PROGRAM_BUILD else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD)) #else else if (BELOW_THRESHOLD (n, MPN_TOOM3_MAX_N)) #endif { mp_ptr ws; TMP_SDECL; TMP_SMARK; ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n)); mpn_toom3_sqr_n (p, a, n, ws); TMP_SFREE; } else #if WANT_FFT || TUNE_PROGRAM_BUILD { /* The current FFT code allocates its own space. That should probably change. */ mpn_mul_fft_full (p, a, n, a, n); } #else { /* Toom3 for large operands. Use workspace from the heap, as stack space may be limited. Since n is at least MUL_TOOM3_THRESHOLD, multiplication will take much longer than malloc()/free(). */ mp_ptr ws; mp_size_t ws_size; ws_size = MPN_TOOM3_SQR_N_TSIZE (n); ws = __GMP_ALLOCATE_FUNC_LIMBS (ws_size); mpn_toom3_sqr_n (p, a, n, ws); __GMP_FREE_FUNC_LIMBS (ws, ws_size); } #endif }
void mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth) { mp_ptr rootp, up, remp; mp_size_t us, un, rootn, remn; up = PTR(u); us = SIZ(u); /* even roots of negatives provoke an exception */ if (us < 0 && (nth & 1) == 0) SQRT_OF_NEGATIVE; /* root extraction interpreted as c^(1/nth) means a zeroth root should provoke a divide by zero, do this even if c==0 */ if (nth == 0) DIVIDE_BY_ZERO; if (us == 0) { if (root != NULL) SIZ(root) = 0; SIZ(rem) = 0; return; } un = ABS (us); rootn = (un - 1) / nth + 1; if (root != NULL) { rootp = MPZ_REALLOC (root, rootn); up = PTR(u); } else { rootp = __GMP_ALLOCATE_FUNC_LIMBS (rootn); } MPZ_REALLOC (rem, un); remp = PTR(rem); if (nth == 1) { MPN_COPY (rootp, up, un); remn = 0; } else { remn = mpn_rootrem (rootp, remp, up, un, nth); } if (root != NULL) SIZ(root) = us >= 0 ? rootn : -rootn; else __GMP_FREE_FUNC_LIMBS (rootp, rootn); SIZ(rem) = remn; }
static void ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_ptr tp; mp_size_t tn; mp_limb_t cy; if (vn < TOOM3_THRESHOLD) { /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own mul_basecase. */ if (vn != 0) mul_basecase (wp, up, un, vp, vn); else MPN_ZERO (wp, un); return; } if (vn < FFT_THRESHOLD) { /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n. */ tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn); tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn); } else { /* Finally, for the largest operands, use mpn_toom3_mul_n. */ /* The "- 63 + 255" tweaks the allocation to allow for huge operands. See the definition of this macro in gmp-impl.h to understand this. */ tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255; tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn); } if (un != vn) { if (un - vn < vn) ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn); else ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn); MPN_COPY (wp, tp, vn); cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn); mpn_incr_u (wp + 2 * vn, cy); } else { MPN_COPY (wp, tp, 2 * vn); } __GMP_FREE_FUNC_LIMBS (tp, tn); }
void mpn_sqr_n (mp_ptr prodp, mp_srcptr up, mp_size_t un) { ASSERT (un >= 1); ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un)); /* FIXME: Can this be removed? */ if (un == 0) return; if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD)) { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ mpn_mul_basecase (prodp, up, un, up, un); } else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD)) { /* plain schoolbook multiplication */ mpn_sqr_basecase (prodp, up, un); } else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD)) { /* karatsuba multiplication */ mp_ptr tspace; TMP_DECL (marker); TMP_MARK (marker); tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un)); mpn_kara_sqr_n (prodp, up, un, tspace); TMP_FREE (marker); } #if WANT_FFT || TUNE_PROGRAM_BUILD else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD)) #else else #endif { /* Toom3 multiplication. Use workspace from the heap, as stack may be limited. Since n is at least MUL_TOOM3_THRESHOLD, the multiplication will take much longer than malloc()/free(). */ mp_ptr tspace; mp_size_t tsize; tsize = MPN_TOOM3_SQR_N_TSIZE (un); tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize); mpn_toom3_sqr_n (prodp, up, un, tspace); __GMP_FREE_FUNC_LIMBS (tspace, tsize); } #if WANT_FFT || TUNE_PROGRAM_BUILD else {
static void refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v) { mp_size_t usize = u->_mp_size; mp_size_t vsize = v->_mp_size; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_size_t talloc; sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize == 0 || vsize == 0) { SIZ (w) = 0; return; } talloc = usize + vsize; up = u->_mp_d; vp = v->_mp_d; wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc); if (usize > vsize) refmpn_mul (wp, up, usize, vp, vsize); else refmpn_mul (wp, vp, vsize, up, usize); wsize = usize + vsize; wsize -= wp[wsize - 1] == 0; MPZ_REALLOC (w, wsize); MPN_COPY (PTR(w), wp, wsize); SIZ(w) = sign_product < 0 ? -wsize : wsize; __GMP_FREE_FUNC_LIMBS (wp, talloc); }
void mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n) { mp_size_t len1, ind; mp_limb_t cy, r30, r31; mp_ptr tp; mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4; len1 = n; ASSERT (n >= 1); MPN_NORMALIZE(up, len1); sn = (n - 1) / 4 + 1; /* a0 - a3 are defined in mpn_toom4_mul_n above */ TC4_NORM(a0, a0n, sn); TC4_NORM(a1, a1n, sn); TC4_NORM(a2, a2n, sn); TC4_NORM(a3, a3n, n - 3*sn); t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1)); tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); tc4_sub(u3, &n3, u4, n4, u5, n5); SQR_TC4(r4, n4, u3, n3); SQR_TC4_UNSIGNED(r3, n3, u2, n2); tc4_lshift(r1, &n1, a0, a0n, 3); tc4_addlsh1_unsigned(r1, &n1, a2, a2n); tc4_lshift(r2, &n8, a1, a1n, 2); tc4_add(r2, &n8, r2, n8, a3, a3n); tc4_add(u4, &n9, r1, n1, r2, n8); tc4_sub(u5, &n5, r1, n1, r2, n8); r30 = r3[0]; if (!n3) r30 = CNST_LIMB(0); r31 = r3[1]; SQR_TC4(r6, n6, u5, n5); SQR_TC4_UNSIGNED(r5, n5, u4, n9); r3[1] = r31; tc4_lshift(u2, &n8, a3, a3n, 3); tc4_addmul_1(u2, &n8, a2, a2n, 4); tc4_addlsh1_unsigned(u2, &n8, a1, a1n); tc4_add(u2, &n8, u2, n8, a0, a0n); SQR_TC4_UNSIGNED(r2, n2, u2, n8); SQR_TC4_UNSIGNED(r1, n1, a3, a3n); SQR_TC4_UNSIGNED(r7, n7, a0, a0n); TC4_DENORM(r1, n1, t4 - 1); TC4_DENORM(r2, n2, t4 - 1); if (n3) TC4_DENORM(r3, n3, t4 - 1); else { /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */ for (ind = 1 ; ind < t4 - 1; ind++) (r3)[ind] = CNST_LIMB(0); } TC4_DENORM(r4, n4, t4 - 1); TC4_DENORM(r5, n5, t4 - 1); TC4_DENORM(r6, n6, t4 - 1); TC4_DENORM(r7, n7, t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length) /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != 2*n) { MPN_ZERO((rp + rpn), 2*n - rpn); } __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1)); }
void mpz_clear (mpz_ptr x) { if (ALLOC (x)) __GMP_FREE_FUNC_LIMBS (PTR (x), ALLOC(x)); }
int main (int argc, char **argv) { gmp_randstate_ptr rands; unsigned long maxnbits, maxdbits, nbits, dbits; mpz_t n, d, tz; mp_size_t maxnn, maxdn, nn, dn, clearn, i; mp_ptr np, dp, qp, rp; mp_limb_t rh; mp_limb_t t; mp_limb_t dinv; int count = COUNT; mp_ptr scratch; mp_limb_t ran; mp_size_t alloc, itch; mp_limb_t rran0, rran1, qran0, qran1; TMP_DECL; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } maxdbits = MAX_DN; maxnbits = MAX_NN; tests_start (); rands = RANDS; mpz_init (n); mpz_init (d); mpz_init (tz); maxnn = maxnbits / GMP_NUMB_BITS + 1; maxdn = maxdbits / GMP_NUMB_BITS + 1; TMP_MARK; qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; alloc = 1; scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc); for (test = 0; test < count;) { nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS; if (maxdbits > nbits) dbits = random_word (rands) % nbits + 1; else dbits = random_word (rands) % maxdbits + 1; #if RAND_UNIFORM #define RANDFUNC mpz_urandomb #else #define RANDFUNC mpz_rrandomb #endif do { RANDFUNC (n, rands, nbits); do { RANDFUNC (d, rands, dbits); } while (mpz_sgn (d) == 0); np = PTR (n); dp = PTR (d); nn = SIZ (n); dn = SIZ (d); } while (nn < dn); dp[0] |= 1; mpz_urandomb (tz, rands, 32); t = mpz_get_ui (tz); if (t % 17 == 0) dp[0] = GMP_NUMB_MAX; switch ((int) t % 16) { case 0: clearn = random_word (rands) % nn; for (i = 0; i <= clearn; i++) np[i] = 0; break; case 1: mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands)); break; case 2: mpn_add_1 (np + nn - dn, dp, dn, random_word (rands)); break; } test++; binvert_limb (dinv, dp[0]); rran0 = random_word (rands); rran1 = random_word (rands); qran0 = random_word (rands); qran1 = random_word (rands); qp[-1] = qran0; qp[nn - dn + 1] = qran1; rp[-1] = rran0; ran = random_word (rands); if ((double) (nn - dn) * dn < 1e5) { if (nn > dn) { /* Test mpn_sbpi1_bdiv_qr */ MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); MPN_COPY (rp, np, nn); rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr"); } if (nn > dn) { /* Test mpn_sbpi1_bdiv_q */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q"); } } if (dn >= 4 && nn - dn >= 2) { /* Test mpn_dcpi1_bdiv_qr */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr"); } if (dn >= 4 && nn - dn >= 2) { /* Test mpn_dcpi1_bdiv_q */ MPN_COPY (rp, np, nn); MPN_ZERO (qp, nn - dn); mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q"); } if (nn > dn) { /* Test mpn_bdiv_qr */ itch = mpn_bdiv_qr_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr"); } if (nn - dn < 2 || dn < 2) continue; /* Test mpn_mu_bdiv_qr */ itch = mpn_mu_bdiv_qr_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr"); /* Test mpn_mu_bdiv_q */ itch = mpn_mu_bdiv_q_itch (nn, dn); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_ZERO (qp, nn - dn + 1); mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q"); } __GMP_FREE_FUNC_LIMBS (scratch, alloc); TMP_FREE; mpz_clear (n); mpz_clear (d); mpz_clear (tz); tests_end (); return 0; }
int main (int argc, char **argv) { gmp_randstate_ptr rands; unsigned long maxnbits, maxdbits, nbits, dbits; mpz_t n, d, q, r, tz, junk; mp_size_t maxnn, maxdn, nn, dn, clearn, i; mp_ptr np, dup, dnp, qp, rp, junkp; mp_limb_t t; gmp_pi1_t dinv; long count = COUNT; mp_ptr scratch; mp_limb_t ran; mp_size_t alloc, itch; mp_limb_t rran0, rran1, qran0, qran1; TMP_DECL; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } maxdbits = MAX_DN; maxnbits = MAX_NN; tests_start (); rands = RANDS; mpz_init (n); mpz_init (d); mpz_init (q); mpz_init (r); mpz_init (tz); mpz_init (junk); maxnn = maxnbits / GMP_NUMB_BITS + 1; maxdn = maxdbits / GMP_NUMB_BITS + 1; TMP_MARK; qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; dnp = TMP_ALLOC_LIMBS (maxdn); alloc = 1; scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc); for (test = -300; test < count; test++) { nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS; if (test < 0) dbits = (test + 300) % (nbits - 1) + 1; else dbits = random_word (rands) % (nbits - 1) % maxdbits + 1; #if RAND_UNIFORM #define RANDFUNC mpz_urandomb #else #define RANDFUNC mpz_rrandomb #endif do RANDFUNC (d, rands, dbits); while (mpz_sgn (d) == 0); dn = SIZ (d); dup = PTR (d); MPN_COPY (dnp, dup, dn); dnp[dn - 1] |= GMP_NUMB_HIGHBIT; if (test % 2 == 0) { RANDFUNC (n, rands, nbits); nn = SIZ (n); ASSERT_ALWAYS (nn >= dn); } else { do { RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1)); RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2)); mpz_mul (n, q, d); mpz_add (n, n, r); nn = SIZ (n); } while (nn > maxnn || nn < dn); } ASSERT_ALWAYS (nn <= maxnn); ASSERT_ALWAYS (dn <= maxdn); mpz_urandomb (junk, rands, nbits); junkp = PTR (junk); np = PTR (n); mpz_urandomb (tz, rands, 32); t = mpz_get_ui (tz); if (t % 17 == 0) { dnp[dn - 1] = GMP_NUMB_MAX; dup[dn - 1] = GMP_NUMB_MAX; } switch ((int) t % 16) { case 0: clearn = random_word (rands) % nn; for (i = clearn; i < nn; i++) np[i] = 0; break; case 1: mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands)); break; case 2: mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands)); break; } if (dn >= 2) invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]); rran0 = random_word (rands); rran1 = random_word (rands); qran0 = random_word (rands); qran1 = random_word (rands); qp[-1] = qran0; qp[nn - dn + 1] = qran1; rp[-1] = rran0; ran = random_word (rands); if ((double) (nn - dn) * dn < 1e5) { /* Test mpn_sbpi1_div_qr */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0); } /* Test mpn_sbpi1_divappr_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1); } /* Test mpn_sbpi1_div_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0); } /* Test mpn_sb_div_qr_sec */ itch = 3 * nn + 4; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); if (nn >= dn) MPN_COPY (qp, junkp, nn - dn + 1); mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0); /* Test mpn_sb_div_r_sec */ itch = nn + 2 * dn + 2; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); mpn_sb_div_r_sec (rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); /* Note: Since check_one cannot cope with random-only functions, we pass qp[] from the previous function, mpn_sb_div_qr_sec. */ check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0); } /* Test mpn_dcpi1_div_qr */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0); } /* Test mpn_dcpi1_divappr_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1); } /* Test mpn_dcpi1_div_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0); } /* Test mpn_mu_div_qr */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_qr_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0); } /* Test mpn_mu_divappr_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_divappr_q_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4); } /* Test mpn_mu_div_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_q_itch (nn, dn, 0); if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0); } if (1) { itch = nn + 1; if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; mpn_div_q (qp, np, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0); } if (dn >= 2 && nn >= 2) { mp_limb_t qh; /* mpn_divrem_2 */ MPN_COPY (rp, np, nn); qp[nn - 2] = qp[nn-1] = qran1; qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0); /* Missing: divrem_2 with fraction limbs. */ /* mpn_div_qr_2 */ qp[nn - 2] = qran1; qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0); } } __GMP_FREE_FUNC_LIMBS (scratch, alloc); TMP_FREE; mpz_clear (n); mpz_clear (d); mpz_clear (q); mpz_clear (r); mpz_clear (tz); mpz_clear (junk); tests_end (); return 0; }
/* Multiply {up, un} by {vp, vn} and write the result to {prodp, un + vn} assuming vn > 2*ceil(un/5). Note that prodp gets un + vn limbs stored, even if the actual result only needs un + vn - 1. */ void mpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_size_t ind; mp_limb_t cy, r30, r31; mp_ptr tp; mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4; sn = (un + 4) / 5; ASSERT (vn > 2*sn); #define a0 (up) #define a1 (up + sn) #define a2 (up + 2*sn) #define a3 (up + 3*sn) #define a4 (up + 4*sn) #define b0 (vp) #define b1 (vp + sn) #define b2 (vp + 2*sn) TC4_NORM(a0, a0n, sn); TC4_NORM(a1, a1n, sn); TC4_NORM(a2, a2n, sn); TC4_NORM(a3, a3n, sn); TC4_NORM(a4, a4n, un - 4*sn); TC4_NORM(b0, b0n, sn); TC4_NORM(b1, b1n, sn); TC4_NORM(b2, b2n, vn - 2*sn); t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1)); #define u2 (tp + 4*t4) #define u3 (tp + 4*t4 + (sn+1)) #define u4 (tp + 4*t4 + 2*(sn+1)) #define u5 (tp + 4*t4 + 3*(sn+1)) tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); tc4_sub(u4, &n4, u5, n5, u2, n2); tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n); tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); tc4_sub(u5, &n5, u5, n5, b1, b1n); MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */ MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */ tc4_lshift(r1, &n1, a0, a0n, 4); tc4_lshift(u3, &n9, a2, a2n, 2); tc4_add_unsigned(r1, &n1, r1, n1, u3, n9); tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n); tc4_lshift(r2, &n8, a1, a1n, 3); tc4_addlsh1_unsigned(r2, &n8, a3, a3n); tc4_add_unsigned(u5, &n5, r1, n1, r2, n8); tc4_sub(u3, &n9, r1, n1, r2, n8); tc4_lshift(r1, &n1, b0, b0n, 2); tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n); tc4_lshift(u4, &n10, b1, b1n, 1); tc4_add_unsigned(u2, &n2, r1, n1, u4, n10); tc4_sub(r2, &n8, r1, n1, u4, n10); r30 = r3[0]; if (!n3) r30 = CNST_LIMB(0); r31 = r3[1]; MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */ MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */ r3[1] = r31; tc4_lshift(u2, &n2, a4, a4n, 4); tc4_addmul_1(u2, &n2, a3, a3n, 8); tc4_addmul_1(u2, &n2, a2, a2n, 4); tc4_addlsh1_unsigned(u2, &n2, a1, a1n); tc4_add(u2, &n2, u2, n2, a0, a0n); tc4_lshift(r1, &n1, b2, b2n, 2); tc4_addlsh1_unsigned(r1, &n1, b1, b1n); tc4_add(r1, &n1, r1, n1, b0, b0n); MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */ MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */ MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */ TC4_DENORM(r1, n1, t4 - 1); TC4_DENORM(r2, n2, t4 - 1); if (n3) TC4_DENORM(r3, n3, t4 - 1); else { /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */ for (ind = 1 ; ind < t4 - 1; ind++) (r3)[ind] = CNST_LIMB(0); } TC4_DENORM(r4, n4, t4 - 1); TC4_DENORM(r5, n5, t4 - 1); TC4_DENORM(r6, n6, t4 - 1); TC4_DENORM(r7, n7, t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length) /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != un + vn) { MPN_ZERO((rp + rpn), un + vn - rpn); } __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1)); }