void my__gmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { ASSERT_ALWAYS (qxn == 0); ASSERT (nn >= 0); ASSERT (dn >= 0); ASSERT (dn == 0 || dp[dn - 1] != 0); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); int adjust; gmp_pi1_t dinv; TMP_DECL; TMP_MARK; /* conservative tests for quotient size */ adjust = np[nn - 1] >= dp[dn - 1]; mp_ptr n2p, d2p; mp_limb_t cy; int cnt; qp[nn - dn] = 0; /* zero high quotient limb */ count_leading_zeros (cnt, dp[dn - 1]); cnt -= GMP_NAIL_BITS; d2p = TMP_ALLOC_LIMBS (dn); mpn_lshift (d2p, dp, dn, cnt); for (int i=0; i<dn; i+=1) { printf("d2p %08x\n", *( (int*) (((void*)(d2p))+(i*4)))); } n2p = TMP_ALLOC_LIMBS (nn + 1); cy = mpn_lshift (n2p, np, nn, cnt); for (int i=0; i<nn; i+=1) { printf("n2p %08x\n", *( (int*) (((void*)(n2p))+(i*4)))); } n2p[nn] = cy; nn += adjust; printf("d2p[dn-1] = %08lx\nd2p[dn-2] = %08lx\n", d2p[dn-1], d2p[dn-2]); invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]); printf("dinv %08lx\n", dinv.inv32); my_mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32); for (int i=0; i<nn; i+=1) { printf("inside qp %08x\n", *( (int*) (((void*)(qp))+(i*4)))); } n2p[nn] = cy; mpn_rshift (rp, n2p, dn, cnt); TMP_FREE; return; }
int main() { gmp_pi1_t a; unsigned long in[] = {0x7c2e09b7,0x847c9b5d}; invert_pi1(a, in[1], in[0]); printf("%08lx\n", a.inv32); return 0; }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ /* n > 1 here */ i = n; do xp[--i] = GMP_NUMB_MAX; while (i); mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/ if (LIKELY(e)) /* The high part can not give a carry by itself. */ e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */ /* If the value was wrong (no carry), correct it (increment). */ e ^= CNST_LIMB (1); MPN_INCR_U (ip, n, e); } } }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else { TMP_DECL; TMP_MARK; if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n)); if (! mpn_add (scratch, scratch, 2*n, dp, n)) MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it. */ } } TMP_FREE; } }
mp_limb_t mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp, mp_size_t n) { mp_size_t k, l; mp_limb_t qh, cy; mpfr_limb_ptr tp; MPFR_TMP_DECL(marker); MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */ k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3); if (k == 0) #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q) { mpfr_pi1_t dinv2; invert_pi1 (dinv2, dp[n - 1], dp[n - 2]); return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32); } #else /* use our own code for base-case short division */ return mpfr_divhigh_n_basecase (qp, np, dp, n); #endif else if (k == n)
/* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n}, with the most significant limb of the quotient as return value (0 or 1). Assumes the most significant bit of D is set. Clobbers N. The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4. */ static mp_limb_t mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_srcptr dp, mp_size_t n) { mp_limb_t qh, d1, d0, dinv, q2, q1, q0; mpfr_pi1_t dinv2; np += n; if ((qh = (mpn_cmp (np, dp, n) >= 0))) mpn_sub_n (np, np, dp, n); /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */ d1 = dp[n - 1]; if (n == 1) { invert_limb (dinv, d1); umul_ppmm (q1, q0, np[0], dinv); qp[0] = np[0] + q1; return qh; } /* now n >= 2 */ d0 = dp[n - 2]; invert_pi1 (dinv2, d1, d0); /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */ while (n > 1) { /* Invariant: it remains to reduce n limbs from N (in addition to the initial low n limbs). Since n >= 2 here, necessarily we had n >= 2 initially, which means that in addition to the limb np[n-1] to reduce, we have at least 2 extra limbs, thus accessing np[n-3] is valid. */ /* warning: we can have np[n-1]=d1 and np[n-2]=d0, but since {np,n} < D, the largest possible partial quotient is B-1 */ if (MPFR_UNLIKELY(np[n - 1] == d1 && np[n - 2] == d0)) q2 = ~ (mp_limb_t) 0; else udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3], d1, d0, dinv2.inv32); /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)), we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0), thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0) and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1) thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0 which proves that at most one correction is needed */ q0 = mpn_submul_1 (np - 1, dp, n, q2); if (MPFR_UNLIKELY(q0 > np[n - 1])) { mpn_add_n (np - 1, np - 1, dp, n); q2 --; } qp[--n] = q2; dp ++; } /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1 q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1) <= floor((np[0]*B+np[1])/d1) thus q1 is not larger than the true quotient. q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2 For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0)) thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e., (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0) d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2 thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4. For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 > np[0]*B/d1 - 2. In all cases, if q = floor((np[0]*B+np[1])/d1), we have: q - 4 <= q1 <= q */ umul_ppmm (q1, q0, np[0], dinv2.inv32); qp[0] = np[0] + q1; return qh; }
int main (int argc, char **argv) { gmp_randstate_ptr rands; unsigned long maxnbits, maxdbits, nbits, dbits; mpz_t n, d, q, r, tz, junk; mp_size_t maxnn, maxdn, nn, dn, clearn, i; mp_ptr np, dup, dnp, qp, rp, junkp; mp_limb_t t; gmp_pi1_t dinv; long count = COUNT; mp_ptr scratch; mp_limb_t ran; mp_size_t alloc, itch; mp_limb_t rran0, rran1, qran0, qran1; TMP_DECL; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } maxdbits = MAX_DN; maxnbits = MAX_NN; tests_start (); rands = RANDS; mpz_init (n); mpz_init (d); mpz_init (q); mpz_init (r); mpz_init (tz); mpz_init (junk); maxnn = maxnbits / GMP_NUMB_BITS + 1; maxdn = maxdbits / GMP_NUMB_BITS + 1; TMP_MARK; qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1; dnp = TMP_ALLOC_LIMBS (maxdn); alloc = 1; scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc); for (test = -300; test < count; test++) { nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS; if (test < 0) dbits = (test + 300) % (nbits - 1) + 1; else dbits = random_word (rands) % (nbits - 1) % maxdbits + 1; #if RAND_UNIFORM #define RANDFUNC mpz_urandomb #else #define RANDFUNC mpz_rrandomb #endif do RANDFUNC (d, rands, dbits); while (mpz_sgn (d) == 0); dn = SIZ (d); dup = PTR (d); MPN_COPY (dnp, dup, dn); dnp[dn - 1] |= GMP_NUMB_HIGHBIT; if (test % 2 == 0) { RANDFUNC (n, rands, nbits); nn = SIZ (n); ASSERT_ALWAYS (nn >= dn); } else { do { RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1)); RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2)); mpz_mul (n, q, d); mpz_add (n, n, r); nn = SIZ (n); } while (nn > maxnn || nn < dn); } ASSERT_ALWAYS (nn <= maxnn); ASSERT_ALWAYS (dn <= maxdn); mpz_urandomb (junk, rands, nbits); junkp = PTR (junk); np = PTR (n); mpz_urandomb (tz, rands, 32); t = mpz_get_ui (tz); if (t % 17 == 0) { dnp[dn - 1] = GMP_NUMB_MAX; dup[dn - 1] = GMP_NUMB_MAX; } switch ((int) t % 16) { case 0: clearn = random_word (rands) % nn; for (i = clearn; i < nn; i++) np[i] = 0; break; case 1: mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands)); break; case 2: mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands)); break; } if (dn >= 2) invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]); rran0 = random_word (rands); rran1 = random_word (rands); qran0 = random_word (rands); qran1 = random_word (rands); qp[-1] = qran0; qp[nn - dn + 1] = qran1; rp[-1] = rran0; ran = random_word (rands); if ((double) (nn - dn) * dn < 1e5) { /* Test mpn_sbpi1_div_qr */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0); } /* Test mpn_sbpi1_divappr_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1); } /* Test mpn_sbpi1_div_q */ if (dn > 2) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32); check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0); } /* Test mpn_sb_div_qr_sec */ itch = 3 * nn + 4; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); if (nn >= dn) MPN_COPY (qp, junkp, nn - dn + 1); mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0); /* Test mpn_sb_div_r_sec */ itch = nn + 2 * dn + 2; if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (rp, np, nn); mpn_sb_div_r_sec (rp, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); /* Note: Since check_one cannot cope with random-only functions, we pass qp[] from the previous function, mpn_sb_div_qr_sec. */ check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0); } /* Test mpn_dcpi1_div_qr */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0); } /* Test mpn_dcpi1_divappr_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1); } /* Test mpn_dcpi1_div_q */ if (dn >= 6 && nn - dn >= 3) { MPN_COPY (rp, np, nn); if (nn > dn) MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0); } /* Test mpn_mu_div_qr */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_qr_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); MPN_ZERO (rp, dn); rp[dn] = rran1; qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1); check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0); } /* Test mpn_mu_divappr_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_divappr_q_itch (nn, dn, 0); if (itch + 1 > alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4); } /* Test mpn_mu_div_q */ if (nn - dn > 2 && dn >= 2) { itch = mpn_mu_div_q_itch (nn, dn, 0); if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; MPN_COPY (qp, junkp, nn - dn); qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0); } if (1) { itch = nn + 1; if (itch + 1> alloc) { scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1); alloc = itch + 1; } scratch[itch] = ran; mpn_div_q (qp, np, nn, dup, dn, scratch); ASSERT_ALWAYS (ran == scratch[itch]); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1); check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0); } if (dn >= 2 && nn >= 2) { mp_limb_t qh; /* mpn_divrem_2 */ MPN_COPY (rp, np, nn); qp[nn - 2] = qp[nn-1] = qran1; qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0); /* Missing: divrem_2 with fraction limbs. */ /* mpn_div_qr_2 */ qp[nn - 2] = qran1; qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2); ASSERT_ALWAYS (qp[nn - 2] == qran1); ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1); qp[nn - 2] = qh; check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0); } } __GMP_FREE_FUNC_LIMBS (scratch, alloc); TMP_FREE; mpz_clear (n); mpz_clear (d); mpz_clear (q); mpz_clear (r); mpz_clear (tz); mpz_clear (junk); tests_end (); return 0; }
void mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m) { if (el < 20) { mp_ptr xp, tp, mp, bp, scratch; mp_size_t xn, tn, mn, bn; int m_zero_cnt; int c; mp_limb_t e, m2; gmp_pi1_t dinv; TMP_DECL; mp = PTR(m); mn = ABSIZ(m); if (UNLIKELY (mn == 0)) DIVIDE_BY_ZERO; if (el == 0) { /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if M equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; return; } TMP_MARK; /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } m2 = mn == 1 ? 0 : mp[mn - 2]; invert_pi1 (dinv, mp[mn - 1], m2); bn = ABSIZ(b); bp = PTR(b); if (bn > mn) { /* Reduce possibly huge base. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn); reduce (new_bp, bp, bn, mp, mn, &dinv); bp = new_bp; bn = mn; /* Canonicalize the base, since we are potentially going to multiply with it quite a few times. */ MPN_NORMALIZE (bp, bn); } if (bn == 0) { SIZ(r) = 0; TMP_FREE; return; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); xp = TMP_ALLOC_LIMBS (mn); scratch = TMP_ALLOC_LIMBS (mn + 1); MPN_COPY (xp, bp, bn); xn = bn; e = el; count_leading_zeros (c, e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = GMP_LIMB_BITS - 1 - c; if (c == 0) { /* If m is already normalized (high bit of high limb set), and b is the same size, but a bigger value, and e==1, then there's no modular reductions done and we can end up with a result out of range at the end. */ if (xn == mn && mpn_cmp (xp, mp, mn) >= 0) mpn_sub_n (xp, xp, mp, mn); } else { /* Main loop. */ do { mpn_sqr (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mod (tp, tn, mp, mn, &dinv, scratch); MPN_COPY (xp, tp, mn); xn = mn; } if ((mp_limb_signed_t) e < 0) { mpn_mul (tp, xp, xn, bp, bn); tn = xn + bn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mod (tp, tn, mp, mn, &dinv, scratch); MPN_COPY (xp, tp, mn); xn = mn; } } e <<= 1; c--; } while (c != 0); } /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it with the original M. */ if (m_zero_cnt != 0) { mp_limb_t cy; cy = mpn_lshift (tp, xp, xn, m_zero_cnt); tp[xn] = cy; xn += cy != 0; if (xn < mn) { MPN_COPY (xp, tp, xn); } else { mod (tp, xn, mp, mn, &dinv, scratch); MPN_COPY (xp, tp, mn); xn = mn; } mpn_rshift (xp, xp, xn, m_zero_cnt); } MPN_NORMALIZE (xp, xn); if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0) { mp = PTR(m); /* want original, unnormalized m */ mpn_sub (xp, mp, mn, xp, xn); xn = mn; MPN_NORMALIZE (xp, xn); } MPZ_REALLOC (r, xn); SIZ (r) = xn; MPN_COPY (PTR(r), xp, xn); TMP_FREE; } else { /* For large exponents, fake a mpz_t exponent and deflect to the more sophisticated mpz_powm. */ mpz_t e; mp_limb_t ep[LIMBS_PER_ULONG]; MPZ_FAKE_UI (e, ep, el); mpz_powm (r, b, e, m); } }
void mpn_div_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch) { mp_ptr new_dp, new_np, tp, rp; mp_limb_t cy, dh, qh; mp_size_t new_nn, qn; gmp_pi1_t dinv; int cnt; TMP_DECL; TMP_MARK; ASSERT (nn >= dn); ASSERT (dn > 0); ASSERT (dp[dn - 1] != 0); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); ASSERT_ALWAYS (FUDGE >= 2); if (dn == 1) { mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); return; } qn = nn - dn + 1; /* Quotient size, high limb might be zero */ if (qn + FUDGE >= dn) { /* |________________________| |_______| */ new_np = scratch; dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np, nn, cnt); new_np[nn] = cy; new_nn = nn + (cy != 0); new_dp = TMP_ALLOC_LIMBS (dn); mpn_lshift (new_dp, dp, dn, cnt); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); } if (cy == 0) qp[qn - 1] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - dn; for (i = 0; i < n; i++) qp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ { if (new_np != np) MPN_COPY (new_np, np, nn); if (dn == 2) { qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); } else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); } else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ { invert_pi1 (dinv, dh, dp[dn - 2]); qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); } else { mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); } qp[nn - dn] = qh; } } else { /* |________________________| |_________________| */ tp = TMP_ALLOC_LIMBS (qn + 1); new_np = scratch; new_nn = 2 * qn + 1; if (new_np == np) /* We need {np,nn} to remain untouched until the final adjustment, so we need to allocate separate space for new_np. */ new_np = TMP_ALLOC_LIMBS (new_nn + 1); dh = dp[dn - 1]; if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) { count_leading_zeros (cnt, dh); cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); new_np[new_nn] = cy; new_nn += (cy != 0); new_dp = TMP_ALLOC_LIMBS (qn + 1); mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); if (qn + 1 == 2) { qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); } else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); } else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) { invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); } else { mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); mp_ptr scratch = TMP_ALLOC_LIMBS (itch); qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); } if (cy == 0) tp[qn] = qh; else if (UNLIKELY (qh != 0)) { /* This happens only when the quotient is close to B^n and mpn_*_divappr_q returned B^n. */ mp_size_t i, n; n = new_nn - (qn + 1); for (i = 0; i < n; i++) tp[i] = GMP_NUMB_MAX; qh = 0; /* currently ignored */ } } else /* divisor is already normalised */ {