int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error, const arf_t x, mp_size_t wn) { mp_srcptr xp; mp_size_t xn; int negative; slong exp; ARF_GET_MPN_READONLY(xp, xn, x); exp = ARF_EXP(x); negative = ARF_SGNBIT(x); if (exp <= -1) { /* todo: just zero top */ flint_mpn_zero(w, wn); *error = _arf_get_integer_mpn(w, xp, xn, exp + wn * FLINT_BITS); if (!negative) { fmpz_zero(q); } else { if (wn > ARB_LOG_TAB2_LIMBS) return 0; mpn_sub_n(w, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - wn, w, wn); *error += 1; /* log(2) has 1 ulp error */ fmpz_set_si(q, -1); } return 1; /* success */ } else { mp_ptr qp, rp, np; mp_srcptr dp; mp_size_t qn, rn, nn, dn, tn, alloc; TMP_INIT; tn = ((exp + 2) + FLINT_BITS - 1) / FLINT_BITS; dn = wn + tn; /* denominator */ nn = wn + 2 * tn; /* numerator */ qn = nn - dn + 1; /* quotient */ rn = dn; /* remainder */ if (dn > ARB_LOG_TAB2_LIMBS) return 0; TMP_START; alloc = qn + rn + nn; qp = TMP_ALLOC_LIMBS(alloc); rp = qp + qn; np = rp + rn; dp = arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - dn; /* todo: prove that zeroing is unnecessary */ flint_mpn_zero(np, nn); _arf_get_integer_mpn(np, xp, xn, exp + dn * FLINT_BITS); mpn_tdiv_qr(qp, rp, 0, np, nn, dp, dn); if (!negative) { flint_mpn_copyi(w, rp + tn, wn); *error = 2; } else { if (mpn_add_1(qp, qp, qn, 1)) { /* I believe this cannot happen (should prove it) */ flint_printf("mod log(2): unexpected carry\n"); abort(); } mpn_sub_n(w, dp + tn, rp + tn, wn); *error = 3; } /* read the exponent */ while (qn > 1 && qp[qn-1] == 0) qn--; if (qn == 1) { if (!negative) fmpz_set_ui(q, qp[0]); else fmpz_neg_ui(q, qp[0]); } else { fmpz_set_mpn_large(q, qp, qn, negative); } TMP_END; return 1; } }
void mpz_mfac_uiui (mpz_ptr x, unsigned long n, unsigned long m) { ASSERT (n <= GMP_NUMB_MAX); ASSERT (m != 0); if (n < 3 || n - 3 < m - 1) { /* (n < 3 || n - 1 <= m || m == 0) */ PTR (x)[0] = n + (n == 0); SIZ (x) = 1; } else { /* m < n - 1 < GMP_NUMB_MAX */ mp_limb_t g, sn; mpz_t t; sn = n; g = mpn_gcd_1 (&sn, 1, m); if (g != 1) { n/=g; m/=g; } if (m <= 2) { /* fac or 2fac */ if (m == 1) { if (g > 2) { mpz_init (t); mpz_fac_ui (t, n); sn = n; } else { if (g == 2) mpz_2fac_ui (x, n << 1); else mpz_fac_ui (x, n); return; } } else { /* m == 2 */ if (g != 1) { mpz_init (t); mpz_2fac_ui (t, n); sn = n / 2 + 1; } else { mpz_2fac_ui (x, n); return; } } } else { /* m >= 3, gcd(n,m) = 1 */ mp_limb_t *factors; mp_limb_t prod, max_prod, j; TMP_DECL; sn = n / m + 1; j = 0; prod = n; n -= m; max_prod = GMP_NUMB_MAX / n; TMP_MARK; factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2); for (; n > m; n -= m) FACTOR_LIST_STORE (n, prod, max_prod, factors, j); factors[j++] = n; factors[j++] = prod; if (g > 1) { mpz_init (t); mpz_prodlimbs (t, factors, j); } else mpz_prodlimbs (x, factors, j); TMP_FREE; } if (g > 1) { mpz_t p; mpz_init (p); mpz_ui_pow_ui (p, g, sn); /* g^sn */ mpz_mul (x, p, t); mpz_clear (p); mpz_clear (t); } } }
mp_limb_t mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t q_orig, qn, sh, sl, i; mp_limb_t qh, cy, cy2; mp_ptr tp; TMP_DECL; ASSERT (dn >= 6); ASSERT (nn >= dn + 3); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); qn = nn - dn; if (qn + 1 < dn) { dp += dn - (qn + 1); dn = qn + 1; } q_orig = qn; qh = mpn_cmp(np + nn - dn, dp, dn) >= 0; if (qh != 0) mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn); np += nn - dn - qn; nn = dn + qn; /* Reduce until dn - 1 >= qn */ while (dn - 1 < qn) { sh = MIN(dn, qn - dn + 1); if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv); qn -= sh; nn -= sh; } cy = np[nn - 1]; /* split into two parts */ sh = qn/2; sl = qn - sh; /* Rare case where truncation ruins normalisation */ if (cy > dp[dn - 1] || (cy == dp[dn - 1] && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0)) { __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn); return qh; } if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0) __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh); else { if (sh < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); else mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv); } cy = np[nn - sh]; TMP_MARK; tp = TMP_ALLOC_LIMBS(sl + 2); mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh); cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2); TMP_FREE; while ((mp_limb_signed_t) cy < 0) { qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */ /* correct remainder, noting that "digits" of quotient aren't base B but in base varying with truncation, thus correction needs fixup */ cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++) cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]); } if (cy != 0) /* special case: unable to canonicalise */ __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl); else { if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0) __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl); else { if (sl < SB_DIVAPPR_Q_CUTOFF) mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv); else mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv); } } return qh; }
void mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth) { mp_ptr rootp, up, remp; mp_size_t us, un, rootn, remn; TMP_DECL; us = SIZ(u); /* even roots of negatives provoke an exception */ if (us < 0 && (nth & 1) == 0) SQRT_OF_NEGATIVE; /* root extraction interpreted as c^(1/nth) means a zeroth root should provoke a divide by zero, do this even if c==0 */ if (nth == 0) DIVIDE_BY_ZERO; if (us == 0) { if (root != NULL) SIZ(root) = 0; SIZ(rem) = 0; return; } un = ABS (us); rootn = (un - 1) / nth + 1; TMP_MARK; /* FIXME: Perhaps disallow root == NULL */ if (root != NULL && u != root) rootp = MPZ_REALLOC (root, rootn); else rootp = TMP_ALLOC_LIMBS (rootn); if (u != rem) remp = MPZ_REALLOC (rem, un); else remp = TMP_ALLOC_LIMBS (un); up = PTR(u); if (nth == 1) { MPN_COPY (rootp, up, un); remn = 0; } else { remn = mpn_rootrem (rootp, remp, up, un, (mp_limb_t) nth); } if (root != NULL) { SIZ(root) = us >= 0 ? rootn : -rootn; if (u == root) MPN_COPY (up, rootp, rootn); else if (u == rem) MPN_COPY (up, remp, remn); } SIZ(rem) = us < 0 && remn > 0 ? -remn : remn; TMP_FREE; }
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly, int alternating) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; int cosorsin; TMP_INIT; TMP_START; if (2 * N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_sin_cos_taylor_rs: N too large!\n"); abort(); } if (N <= 1) { if (N == 0) { flint_mpn_zero(ysin, xn); if (!sinonly) flint_mpn_zero(ycos, xn); error[0] = 0; } else if (N == 1) { flint_mpn_copyi(ysin, x, xn); if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES); error[0] = 1; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | <temp> | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) mpn_sqr(XPOW_WRITE(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } for (cosorsin = sinonly; cosorsin < 2; cosorsin++) { flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[2 * k + cosorsin]; new_denom = factorial_tab_denom[2 * k + cosorsin]; old_denom = factorial_tab_denom[2 * k + cosorsin + 2]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { if (alternating && (k % 2 == 0)) s[xn] += old_denom; mpn_divrem_1(s, 0, s, xn + 1, old_denom); if (alternating && (k % 2 == 0)) s[xn] -= 1; } if (power == 0) { /* add c * x^0 -- only top limb is affected */ if (alternating & k) s[xn] -= c; else s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { if (alternating & k) s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c); else s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ if (cosorsin == 0) { mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]); /* perturb down to a number < 1 if necessary. note that this does not invalidate the error bound: 1 - ulp is either 1 ulp too small or must be closer to the exact value */ if (t[xn] == 0) flint_mpn_copyi(ycos, t, xn); else flint_mpn_store(ycos, xn, LIMB_ONES); } else { mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]); mpn_mul(t, s, xn + 1, x, xn); flint_mpn_copyi(ysin, t + xn, xn); } } /* error bound (ulp) */ error[0] = 2; } TMP_END; }
size_t mpz_inp_raw (mpz_ptr x, FILE *fp) { unsigned char csize_bytes[4]; mp_size_t csize, abs_xsize, i; size_t abs_csize; char *cp; mp_ptr xp, sp, ep; mp_limb_t slimb, elimb; if (fp == 0) fp = stdin; /* 4 bytes for size */ if (fread (csize_bytes, sizeof (csize_bytes), 1, fp) != 1) return 0; csize = ( (mp_size_t) csize_bytes[0] << 24) + ((mp_size_t) csize_bytes[1] << 16) + ((mp_size_t) csize_bytes[2] << 8) + ((mp_size_t) csize_bytes[3]); /* Sign extend if necessary. Could write "csize -= ((csize & 0x80000000L) << 1)", but that tickles a bug in gcc 3.0 for powerpc64 on AIX. */ if (sizeof (csize) > 4 && csize & 0x80000000L) csize -= 0x80000000L << 1; abs_csize = ABS (csize); /* round up to a multiple of limbs */ abs_xsize = BITS_TO_LIMBS (abs_csize*8); if (abs_xsize != 0) { xp = MPZ_NEWALLOC (x, abs_xsize); /* Get limb boundaries right in the read, for the benefit of the non-nails case. */ xp[0] = 0; cp = (char *) (xp + abs_xsize) - abs_csize; if (fread (cp, abs_csize, 1, fp) != 1) return 0; if (GMP_NAIL_BITS == 0) { /* Reverse limbs to least significant first, and byte swap. If abs_xsize is odd then on the last iteration elimb and slimb are the same. It doesn't seem extra code to handle that case separately, to save an NTOH. */ sp = xp; ep = xp + abs_xsize-1; for (i = 0; i < (abs_xsize+1)/2; i++) { NTOH_LIMB_FETCH (elimb, ep); NTOH_LIMB_FETCH (slimb, sp); *sp++ = elimb; *ep-- = slimb; } } else { /* It ought to be possible to do the transformation in-place, but for now it's easier to use an extra temporary area. */ mp_limb_t byte, limb; int bits; mp_size_t tpos; mp_ptr tp; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (abs_xsize); limb = 0; bits = 0; tpos = 0; for (i = abs_csize-1; i >= 0; i--) { byte = (unsigned char) cp[i]; limb |= (byte << bits); bits += 8; if (bits >= GMP_NUMB_BITS) { ASSERT (tpos < abs_xsize); tp[tpos++] = limb & GMP_NUMB_MASK; bits -= GMP_NUMB_BITS; ASSERT (bits < 8); limb = byte >> (8 - bits); } } if (bits != 0) { ASSERT (tpos < abs_xsize); tp[tpos++] = limb; } ASSERT (tpos == abs_xsize); MPN_COPY (xp, tp, abs_xsize); TMP_FREE; }
/* Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is either correct or one too large. We require dp to be normalised and inv to be a precomputed inverse given by mpn_invert. */ mp_limb_t mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t dn, mp_srcptr inv) { mp_limb_t cy, lo, ret = 0, ret2 = 0; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT); ASSERT(mpn_is_invert(inv, dp, dn)); if (mpn_cmp(np + dn, dp, dn) >= 0) { ret2 = 1; mpn_sub_n(np + dn, np + dn, dp, dn); } tp = TMP_ALLOC_LIMBS(2*dn + 1); mpn_mul(tp, np + dn - 1, dn + 1, inv, dn); add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]); ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn); ret += mpn_add_1(qp, qp, dn, cy + 1); /* Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then DX < B^{2*dn} <= D(X+1), thus Let N' = { np + n - 1, n + 1 } N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1 N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B There is either one integer in this range, or two. However, in the latter case the left hand bound is either an integer or < 2/B below one. */ if (UNLIKELY(ret == 1)) { ret -= mpn_sub_1(qp, qp, dn, 1); ASSERT(ret == 0); } if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) { /* Special case, multiply out to get accurate quotient */ ret -= mpn_sub_1(qp, qp, dn, 1); if (UNLIKELY(ret == ~CNST_LIMB(0))) ret += mpn_add_1(qp, qp, dn, 1); /* ret is now guaranteed to be 0 */ ASSERT(ret == 0); mpn_mul_n(tp, qp, dp, dn); mpn_sub_n(tp, np, tp, dn+1); while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0) { ret += mpn_add_1(qp, qp, dn, 1); tp[dn] -= mpn_sub_n(tp, tp, dp, dn); } /* Not possible for ret == 2 as we have qp*dp <= np */ ASSERT(ret + ret2 < 2); } TMP_FREE; return ret + ret2; }
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd. Iterates r' <-- r - r * (a^{k-1} r^k - 1) / n If a^{k-1} r^k = 1 (mod 2^m), then a^{k-1} r'^k = 1 (mod 2^{2m}), Compute the update term as r' = r - (a^{k-1} r^{k+1} - r) / k where we still have cancelation of low limbs. */ void mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) { mp_size_t sizes[GMP_LIMB_BITS * 2]; mp_ptr akm1, tp, rnp, ep, scratch; mp_limb_t a0, r0, km1, kp1h, kinv; mp_size_t rn; unsigned i; TMP_DECL; ASSERT (n > 0); ASSERT (ap[0] & 1); ASSERT (k & 1); ASSERT (k >= 3); TMP_MARK; akm1 = TMP_ALLOC_LIMBS (4*n); tp = akm1 + n; km1 = k-1; /* FIXME: Could arrange the iteration so we don't need to compute this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note that we can use wraparound also for a*r, since the low half is unchanged from the previous iteration. Or possibly mulmid. Also, a r = a^{1/k}, so we get that value too, for free? */ mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */ a0 = ap[0]; binvert_limb (kinv, k); /* 4 bits: a^{1/k - 1} (mod 16): a % 8 1 3 5 7 k%4 +------- 1 |1 1 1 1 3 |1 9 9 1 */ r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8); r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */ #if GMP_NUMB_BITS > 32 { unsigned prec = 32; do { r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); prec *= 2; } while (prec < GMP_NUMB_BITS); } #endif rp[0] = r0; if (n == 1) { TMP_FREE; return; } /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ kp1h = k/2 + 1; /* FIXME: Special case for two limb iteration. */ rnp = TMP_ALLOC_LIMBS (2*n + 1); ep = rnp + n; /* FIXME: Possible to this on the fly with some bit fiddling. */ for (i = 0; n > 1; n = (n + 1)/2) sizes[i++] = n; rn = 1; while (i-- > 0) { /* Compute x^{k+1}. */ mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the final iteration.*/ mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp); /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */ mpn_mullo_n (ep, rnp, akm1, sizes[i]); ASSERT (mpn_cmp (ep, rp, rn) == 0); ASSERT (sizes[i] <= 2*rn); mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0); mpn_neg (rp + rn, rp + rn, sizes[i] - rn); rn = sizes[i]; } TMP_FREE; }
void arb_log_arf(arb_t z, const arf_t x, slong prec) { if (arf_is_special(x)) { if (arf_is_pos_inf(x)) arb_pos_inf(z); else arb_indeterminate(z); } else if (ARF_SGNBIT(x)) { arb_indeterminate(z); } else if (ARF_IS_POW2(x)) { if (fmpz_is_one(ARF_EXPREF(x))) { arb_zero(z); } else { fmpz_t exp; fmpz_init(exp); _fmpz_add_fast(exp, ARF_EXPREF(x), -1); arb_const_log2(z, prec + 2); arb_mul_fmpz(z, z, exp, prec); fmpz_clear(exp); } } else if (COEFF_IS_MPZ(*ARF_EXPREF(x))) { arb_log_arf_huge(z, x, prec); } else { slong exp, wp, wn, N, r, closeness_to_one; mp_srcptr xp; mp_size_t xn, tn; mp_ptr tmp, w, t, u; mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy; int negative, inexact, used_taylor_series; TMP_INIT; exp = ARF_EXP(x); negative = 0; ARF_GET_MPN_READONLY(xp, xn, x); /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */ closeness_to_one = 0; if (exp == 0) { slong i; closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]); if (closeness_to_one == FLINT_BITS) { for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i])); } } else if (exp == 1) { closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP)); if (closeness_to_one == FLINT_BITS) { slong i; for (i = xn - 2; xp[i] == 0; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i])); } closeness_to_one--; } /* if |t-1| <= 0.5 */ /* |log(1+t) - t| <= t^2 */ /* |log(1+t) - (t-t^2/2)| <= t^3 */ if (closeness_to_one > prec + 1) { inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); return; } else if (2 * closeness_to_one > prec + 1) { arf_t t, u; arf_init(t); arf_init(u); arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul_2exp_si(u, u, -1); inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); arf_clear(t); arf_clear(u); return; } /* Absolute working precision (NOT rounded to a limb multiple) */ wp = prec + closeness_to_one + 5; /* Too high precision to use table */ if (wp > ARB_LOG_TAB2_PREC) { arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND); arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec); return; } /* Working precision in limbs */ wn = (wp + FLINT_BITS - 1) / FLINT_BITS; TMP_START; tmp = TMP_ALLOC_LIMBS(4 * wn + 3); w = tmp; /* requires wn+1 limbs */ t = w + wn + 1; /* requires wn+1 limbs */ u = t + wn + 1; /* requires 2wn+1 limbs */ /* read x-1 */ if (xn <= wn) { flint_mpn_zero(w, wn - xn); mpn_lshift(w + wn - xn, xp, xn, 1); error = 0; } else { mpn_lshift(w, xp + xn - wn, wn, 1); error = 1; } /* First table-based argument reduction */ if (wp <= ARB_LOG_TAB1_PREC) q1bits = ARB_LOG_TAB11_BITS; else q1bits = ARB_LOG_TAB21_BITS; p1 = w[wn-1] >> (FLINT_BITS - q1bits); /* Special case: covers logarithms of small integers */ if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits)))) { p2 = 0; flint_mpn_zero(t, wn); used_taylor_series = 0; N = r = 0; /* silence compiler warning */ } else { /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */ w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1; mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits)); error += 1; /* Second table-based argument reduction (fused with log->atanh conversion) */ if (wp <= ARB_LOG_TAB1_PREC) q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS; else q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS; p2 = w[wn-1] >> (FLINT_BITS - q2bits); u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits); flint_mpn_zero(u, wn); flint_mpn_copyi(t, u + wn, wn + 1); t[wn] += p2 + (UWORD(1) << (q2bits + 1)); u[2 * wn] -= p2; mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1); /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */ error += 3; /* |w| <= 2^-r */ r = _arb_mpn_leading_zeros(w, wn); /* N >= (wp-r)/(2r) */ N = (wp - r + (2*r-1)) / (2*r); N = FLINT_MAX(N, 0); /* Evaluate Taylor series */ _arb_atan_taylor_rs(t, &error2, w, wn, N, 0); /* Multiply by 2 */ mpn_lshift(t, t, wn, 1); /* Taylor series evaluation error (multiply by 2) */ error += error2 * 2; used_taylor_series = 1; } /* Size of output number */ tn = wn; /* First table lookup */ if (p1 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* Second table lookup */ if (p2 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* add exp * log(2) */ exp--; if (exp > 0) { cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp); t[tn] = cy; tn += (cy != 0); error += exp; } else if (exp < 0) { t[tn] = 0; u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp); if (mpn_cmp(t, u, tn + 1) >= 0) { mpn_sub_n(t, t, u, tn + 1); } else { mpn_sub_n(t, u, t, tn + 1); negative = 1; } error += (-exp); tn += (t[tn] != 0); } /* The accumulated arithmetic error */ mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS); /* Truncation error from the Taylor series */ if (used_taylor_series) mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1); /* Set the midpoint */ inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); TMP_END; } }
mp_bitcnt_t mpn_remove (mp_ptr wp, mp_size_t *wn, mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn, mp_bitcnt_t cap) { mp_ptr pwpsp[LOG]; mp_size_t pwpsn[LOG]; mp_size_t npowers; mp_ptr tp, qp, np, pp, qp2; mp_size_t pn, nn, qn, i; mp_bitcnt_t pwr; TMP_DECL; ASSERT (un > 0); ASSERT (vn > 0); ASSERT (vp[0] % 2 != 0); /* 2-adic division wants odd numbers */ ASSERT (vn > 1 || vp[0] > 1); /* else we would loop indefinitely */ TMP_MARK; tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */ qp = TMP_ALLOC_LIMBS (un + 1); /* quotient, alternating */ qp2 = TMP_ALLOC_LIMBS (un + 1); /* quotient, alternating */ pp = vp; pn = vn; MPN_COPY (qp, up, un); qn = un; npowers = 0; while (qn >= pn) { qp[qn] = 0; mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn); if (!mpn_zero_p (tp, pn)) break; /* could not divide by V^npowers */ MP_PTR_SWAP (qp, qp2); qn = qn - pn; qn += qp[qn] != 0; pwpsp[npowers] = pp; pwpsn[npowers] = pn; npowers++; if (((mp_bitcnt_t) 2 << npowers) - 1 > cap) break; nn = 2 * pn - 1; /* next power will be at least this large */ if (nn > qn) break; /* next power would be overlarge */ if (npowers == 1) /* Alloc once, but only if it's needed */ np = TMP_ALLOC_LIMBS (qn + LOG); /* powers of V */ else np += pn; mpn_sqr (np, pp, pn); pn = nn + (np[nn] != 0); pp = np; } pwr = ((mp_bitcnt_t) 1 << npowers) - 1; for (i = npowers - 1; i >= 0; i--) { pn = pwpsn[i]; if (qn < pn) continue; if (pwr + ((mp_bitcnt_t) 1 << i) > cap) continue; /* V^i would bring us past cap */ qp[qn] = 0; mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn); if (!mpn_zero_p (tp, pn)) continue; /* could not divide by V^i */ MP_PTR_SWAP (qp, qp2); qn = qn - pn; qn += qp[qn] != 0; pwr += (mp_bitcnt_t) 1 << i; } MPN_COPY (wp, qp, qn); *wn = qn; TMP_FREE; return pwr; }
void mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den) { mp_size_t ql; mp_size_t ns, ds, nl, dl; mp_ptr np, dp, qp, rp; TMP_DECL; ns = SIZ (num); ds = SIZ (den); nl = ABS (ns); dl = ABS (ds); ql = nl - dl + 1; if (UNLIKELY (dl == 0)) DIVIDE_BY_ZERO; rp = MPZ_REALLOC (rem, dl); if (ql <= 0) { if (num != rem) { np = PTR (num); MPN_COPY (rp, np, nl); SIZ (rem) = SIZ (num); } return; } TMP_MARK; qp = TMP_ALLOC_LIMBS (ql); np = PTR (num); dp = PTR (den); /* FIXME: We should think about how to handle the temporary allocation. Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to allocate temp space. */ /* Copy denominator to temporary space if it overlaps with the remainder. */ if (dp == rp) { mp_ptr tp; tp = TMP_ALLOC_LIMBS (dl); MPN_COPY (tp, dp, dl); dp = tp; } /* Copy numerator to temporary space if it overlaps with the remainder. */ if (np == rp) { mp_ptr tp; tp = TMP_ALLOC_LIMBS (nl); MPN_COPY (tp, np, nl); np = tp; } mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl); MPN_NORMALIZE (rp, dl); SIZ (rem) = ns >= 0 ? dl : -dl; TMP_FREE; }
void mpf_set_q (mpf_t r, mpq_srcptr q) { mp_srcptr np, dp; mp_size_t prec, nsize, dsize, qsize, prospective_qsize, tsize, zeros; mp_size_t sign_quotient, high_zero; mp_ptr qp, tp; mp_exp_t exp; TMP_DECL; ASSERT (SIZ(&q->_mp_den) > 0); /* canonical q */ nsize = SIZ (&q->_mp_num); dsize = SIZ (&q->_mp_den); if (UNLIKELY (nsize == 0)) { SIZ (r) = 0; EXP (r) = 0; return; } TMP_MARK; prec = PREC (r); qp = PTR (r); sign_quotient = nsize; nsize = ABS (nsize); np = PTR (&q->_mp_num); dp = PTR (&q->_mp_den); prospective_qsize = nsize - dsize + 1; /* q from using given n,d sizes */ exp = prospective_qsize; /* ie. number of integer limbs */ qsize = prec + 1; /* desired q */ zeros = qsize - prospective_qsize; /* n zeros to get desired qsize */ tsize = nsize + zeros; /* size of intermediate numerator */ tp = TMP_ALLOC_LIMBS (tsize + 1); /* +1 for mpn_div_q's scratch */ if (zeros > 0) { /* pad n with zeros into temporary space */ MPN_ZERO (tp, zeros); MPN_COPY (tp+zeros, np, nsize); np = tp; /* mpn_div_q allows this overlap */ } else { /* shorten n to get desired qsize */ np -= zeros; } ASSERT (tsize-dsize+1 == qsize); mpn_div_q (qp, np, tsize, dp, dsize, tp); /* strip possible zero high limb */ high_zero = (qp[qsize-1] == 0); qsize -= high_zero; exp -= high_zero; EXP (r) = exp; SIZ (r) = sign_quotient >= 0 ? qsize : -qsize; TMP_FREE; }
void mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) { mp_ptr xp, tp, qp, gp, this_gp; mp_srcptr bp, ep, mp; mp_size_t bn, es, en, mn, xn; mp_limb_t invm, c; mpir_ui enb; mp_size_t i, K, j, l, k; int m_zero_cnt, e_zero_cnt; int sh; int use_redc; #if HANDLE_NEGATIVE_EXPONENT mpz_t new_b; #endif #if REDUCE_EXPONENT mpz_t new_e; #endif TMP_DECL; mp = PTR(m); mn = ABSIZ (m); if (mn == 0) DIVIDE_BY_ZERO; TMP_MARK; es = SIZ (e); if (es <= 0) { if (es == 0) { /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if m equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; TMP_FREE; /* we haven't really allocated anything here */ return; } #if HANDLE_NEGATIVE_EXPONENT MPZ_TMP_INIT (new_b, mn + 1); if (! mpz_invert (new_b, b, m)) DIVIDE_BY_ZERO; b = new_b; es = -es; #else DIVIDE_BY_ZERO; #endif } en = es; #if REDUCE_EXPONENT /* Reduce exponent by dividing it by phi(m) when m small. */ if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) { MPZ_TMP_INIT (new_e, 2); mpz_mod_ui (new_e, e, phi (mp[0])); e = new_e; } #endif use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; if (use_redc) { /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ modlimb_invert (invm, mp[0]); invm = -invm; } else { /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp; new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } } /* Determine optimal value of k, the number of exponent bits we look at at a time. */ count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); e_zero_cnt -= GMP_NAIL_BITS; enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ k = 1; K = 2; while (2 * enb > K * (2 + k * (3 + k))) { k++; K *= 2; if (k == 10) /* cap allocation */ break; } tp = TMP_ALLOC_LIMBS (2 * mn); qp = TMP_ALLOC_LIMBS (mn + 1); gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ bn = ABSIZ (b); bp = PTR(b); /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary for speed or correctness to do this when b and m have the same number of limbs, perhaps remove mpn_cmp call. */ if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) { /* Reduce possibly huge base while moving it to gp[0]. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ if (use_redc) { reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ MPN_ZERO (tp, mn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ } else { reduce (gp, bp, bn, mp, mn); } } else { /* |b| < m. We pad out operands to become mn limbs, which simplifies the rest of the function, but slows things down when the |b| << m. */ if (use_redc) { MPN_ZERO (tp, mn); MPN_COPY (tp + mn, bp, bn); MPN_ZERO (tp + mn + bn, mn - bn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); } else { MPN_COPY (gp, bp, bn); MPN_ZERO (gp + bn, mn - bn); } } /* Compute xx^i for odd g < 2^i. */ xp = TMP_ALLOC_LIMBS (mn); mpn_sqr (tp, gp, mn); if (use_redc) mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */ else mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); this_gp = gp; for (i = 1; i < K / 2; i++) { mpn_mul_n (tp, this_gp, xp, mn); this_gp += mn; if (use_redc) mpn_redc_1 (this_gp,tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */ else mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); } /* Start the real stuff. */ ep = PTR (e); i = en - 1; /* current index */ c = ep[i]; /* current limb */ sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ sh -= k; /* index of lower bit of ep[i] to take into account */ if (sh < 0) { /* k-sh extra bits are needed */ if (i > 0) { i--; c <<= (-sh); sh += GMP_NUMB_BITS; c |= ep[i] >> sh; } }
/* Multiply {up, un} by {vp, vn} and write the result to {prodp, un + vn} assuming vn > 3*ceil(un/4). Note that prodp gets un + vn limbs stored, even if the actual result only needs un + vn - 1. */ void mpn_toom4_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_size_t ind; mp_limb_t cy, cy2, r30, r31; mp_ptr tp; mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1, h2; TMP_DECL; sn = (un + 3) / 4; h1 = un - 3*sn; h2 = vn - 3*sn; ASSERT (vn > 3*sn); #define a0 (up) #define a1 (up + sn) #define a2 (up + 2*sn) #define a3 (up + 3*sn) #define b0 (vp) #define b1 (vp + sn) #define b2 (vp + 2*sn) #define b3 (vp + 3*sn) t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs TMP_MARK; tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1)); #define u2 (tp + 4*t4) #define u3 (tp + 4*t4 + (sn+1)) #define u4 (tp + 4*t4 + 2*(sn+1)) #define u5 (tp + 4*t4 + 3*(sn+1)) #define u6 (tp + 4*t4 + 4*(sn+1)) u6[sn] = mpn_add(u6, a1, sn, a3, h1); u5[sn] = mpn_add_n(u5, a2, a0, sn); mpn_add_n(u3, u5, u6, sn + 1); n4 = sn + 1; if (mpn_cmp(u5, u6, sn + 1) >= 0) mpn_sub_n(u4, u5, u6, sn + 1); else { mpn_sub_n(u4, u6, u5, sn + 1); n4 = -n4; } u6[sn] = mpn_add(u6, b1, sn, b3, h2); u5[sn] = mpn_add_n(u5, b2, b0, sn); mpn_add_n(r2, u5, u6, sn + 1); n5 = sn + 1; if (mpn_cmp(u5, u6, sn + 1) >= 0) mpn_sub_n(u5, u5, u6, sn + 1); else { mpn_sub_n(u5, u6, u5, sn + 1); n5 = -n5; } MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */ MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */ #if HAVE_NATIVE_mpn_addlsh_n r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2); mpn_lshift(r1, r1, sn + 1, 1); cy = mpn_addlsh_n(r2, a3, a1, h1, 2); #else r1[sn] = mpn_lshift(r1, a2, sn, 1); MPN_COPY(r2, a3, h1); r1[sn] += mpn_addmul_1(r1, a0, sn, 8); cy = mpn_addmul_1(r2, a1, h1, 4); #endif if (sn > h1) { cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2); cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy); } r2[sn] = cy; mpn_add_n(u5, r1, r2, sn + 1); n6 = sn + 1; if (mpn_cmp(r1, r2, sn + 1) >= 0) mpn_sub_n(u6, r1, r2, sn + 1); else { mpn_sub_n(u6, r2, r1, sn + 1); n6 = -n6; } #if HAVE_NATIVE_mpn_addlsh_n r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2); mpn_lshift(r1, r1, sn + 1, 1); cy = mpn_addlsh_n(r2, b3, b1, h2, 2); #else r1[sn] = mpn_lshift(r1, b2, sn, 1); MPN_COPY(r2, b3, h2); r1[sn] += mpn_addmul_1(r1, b0, sn, 8); cy = mpn_addmul_1(r2, b1, h2, 4); #endif if (sn > h2) { cy2 = mpn_lshift(r2 + h2, b1 + h2, sn - h2, 2); cy = cy2 + mpn_add_1(r2 + h2, r2 + h2, sn - h2, cy); } r2[sn] = cy; mpn_add_n(u2, r1, r2, sn + 1); n8 = sn + 1; if (mpn_cmp(r1, r2, sn + 1) >= 0) mpn_sub_n(r2, r1, r2, sn + 1); else { mpn_sub_n(r2, r2, r1, sn + 1); n8 = -n8; } r30 = r3[0]; r31 = r3[1]; MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */ MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */ r3[1] = r31; #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(u2, a2, a3, h1); if (sn > h1) cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); u2[sn] = cy; u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn); u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn); #else MPN_COPY(u2, a0, sn); u2[sn] = mpn_addmul_1(u2, a1, sn, 2); u2[sn] += mpn_addmul_1(u2, a2, sn, 4); cy = mpn_addmul_1(u2, a3, h1, 8); if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy); u2[sn] += cy; #endif #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n(r1, b2, b3, h2); if (sn > h2) cy = mpn_add_1(r1 + h2, b2 + h2, sn - h2, cy); r1[sn] = cy; r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn); r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn); #else MPN_COPY(r1, b0, sn); r1[sn] = mpn_addmul_1(r1, b1, sn, 2); r1[sn] += mpn_addmul_1(r1, b2, sn, 4); cy = mpn_addmul_1(r1, b3, h2, 8); if (sn > h2) cy = mpn_add_1(r1 + h2, r1 + h2, sn - h2, cy); r1[sn] += cy; #endif MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */ MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h2); /* oo */ MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */ TC4_DENORM(r1, n1, t4 - 1); /* rp rp1 rp2 rp3 rp4 rp5 rp6 rp7 <----------- r7-----------><------------r5--------------> <-------------r3-------------> <-------------r6-------------> < -----------r2------------>{ } <-------------r4--------------> <--------------r1----> */ mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30); if (rpn != un + vn) { MPN_ZERO((rp + rpn), un + vn - rpn); } TMP_FREE; }
void mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2) { mp_srcptr op1_ptr, op2_ptr; mp_size_t op1_size, op2_size; mp_ptr res_ptr; mp_size_t res_size, res_alloc; TMP_DECL; TMP_MARK; op1_size = SIZ(op1); op2_size = SIZ(op2); op1_ptr = PTR(op1); op2_ptr = PTR(op2); res_ptr = PTR(res); if (op1_size >= 0) { if (op2_size >= 0) { if (op1_size >= op2_size) { if (ALLOC(res) < op1_size) { _mpz_realloc (res, op1_size); /* No overlapping possible: op1_ptr = PTR(op1); */ op2_ptr = PTR(op2); res_ptr = PTR(res); } if (res_ptr != op1_ptr) MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size); if (LIKELY (op2_size != 0)) mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size); res_size = op1_size; } else { if (ALLOC(res) < op2_size) { _mpz_realloc (res, op2_size); op1_ptr = PTR(op1); /* No overlapping possible: op2_ptr = PTR(op2); */ res_ptr = PTR(res); } if (res_ptr != op2_ptr) MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size); if (LIKELY (op1_size != 0)) mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size); res_size = op2_size; } MPN_NORMALIZE (res_ptr, res_size); SIZ(res) = (int) res_size; // (int) added by PM return; } else /* op2_size < 0 */ { /* Fall through to the code at the end of the function. */ } } else { if (op2_size < 0) { mp_ptr opx, opy; /* Both operands are negative, the result will be positive. (-OP1) ^ (-OP2) = = ~(OP1 - 1) ^ ~(OP2 - 1) = = (OP1 - 1) ^ (OP2 - 1) */ op1_size = -op1_size; op2_size = -op2_size; /* Possible optimization: Decrease mpn_sub precision, as we won't use the entire res of both. */ TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size); mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1); op1_ptr = opx; mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1); op2_ptr = opy; if (op1_size > op2_size) MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size); res_alloc = op2_size; res_ptr = MPZ_REALLOC (res, res_alloc); MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size); mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size); res_size = op2_size; MPN_NORMALIZE (res_ptr, res_size); SIZ(res) = (int) res_size; // (int) added by PM TMP_FREE; return; } else { /* We should compute -OP1 ^ OP2. Swap OP1 and OP2 and fall through to the code that handles OP1 ^ -OP2. */ MPZ_SRCPTR_SWAP (op1, op2); MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size); } } { mp_ptr opx; mp_limb_t cy; /* Operand 2 negative, so will be the result. -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) = = ~(OP1 ^ ~(OP2 - 1)) + 1 = = (OP1 ^ (OP2 - 1)) + 1 */ op2_size = -op2_size; opx = TMP_ALLOC_LIMBS (op2_size); mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); op2_ptr = opx; res_alloc = MAX (op1_size, op2_size) + 1; if (ALLOC(res) < res_alloc) { _mpz_realloc (res, res_alloc); op1_ptr = PTR(op1); /* op2_ptr points to temporary space. */ res_ptr = PTR(res); } if (op1_size > op2_size) { MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size); mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size); res_size = op1_size; } else { MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size); if (LIKELY (op1_size != 0)) mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size); res_size = op2_size; } cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1); res_ptr[res_size] = cy; res_size += (cy != 0); MPN_NORMALIZE (res_ptr, res_size); SIZ(res) = (int) -res_size; // (int) added by PM TMP_FREE; } }
void mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v) { mpir_ui g_zero_bits, u_zero_bits, v_zero_bits; mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs; mp_ptr tp; mp_ptr up; mp_size_t usize; mp_ptr vp; mp_size_t vsize; mp_size_t gsize; TMP_DECL; up = PTR(u); usize = ABSIZ (u); vp = PTR(v); vsize = ABSIZ (v); /* GCD(0, V) == V. */ if (usize == 0) { SIZ (g) = vsize; if (g == v) return; MPZ_REALLOC (g, vsize); MPN_COPY (PTR (g), vp, vsize); return; } /* GCD(U, 0) == U. */ if (vsize == 0) { SIZ (g) = usize; if (g == u) return; MPZ_REALLOC (g, usize); MPN_COPY (PTR (g), up, usize); return; } if (usize == 1) { SIZ (g) = 1; PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]); return; } if (vsize == 1) { SIZ(g) = 1; PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]); return; } TMP_MARK; /* Eliminate low zero bits from U and V and move to temporary storage. */ while (*up == 0) up++; u_zero_limbs = up - PTR(u); usize -= u_zero_limbs; count_trailing_zeros (u_zero_bits, *up); tp = up; up = TMP_ALLOC_LIMBS (usize); if (u_zero_bits != 0) { mpn_rshift (up, tp, usize, u_zero_bits); usize -= up[usize - 1] == 0; } else MPN_COPY (up, tp, usize); while (*vp == 0) vp++; v_zero_limbs = vp - PTR (v); vsize -= v_zero_limbs; count_trailing_zeros (v_zero_bits, *vp); tp = vp; vp = TMP_ALLOC_LIMBS (vsize); if (v_zero_bits != 0) { mpn_rshift (vp, tp, vsize, v_zero_bits); vsize -= vp[vsize - 1] == 0; } else MPN_COPY (vp, tp, vsize); if (u_zero_limbs > v_zero_limbs) { g_zero_limbs = v_zero_limbs; g_zero_bits = v_zero_bits; } else if (u_zero_limbs < v_zero_limbs) { g_zero_limbs = u_zero_limbs; g_zero_bits = u_zero_bits; } else /* Equal. */ { g_zero_limbs = u_zero_limbs; g_zero_bits = MIN (u_zero_bits, v_zero_bits); } /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */ vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1])) ? mpn_gcd (vp, vp, vsize, up, usize) : mpn_gcd (vp, up, usize, vp, vsize); /* Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits). */ gsize = vsize + g_zero_limbs; if (g_zero_bits != 0) { mp_limb_t cy_limb; gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0; MPZ_REALLOC (g, gsize); MPN_ZERO (PTR (g), g_zero_limbs); tp = PTR(g) + g_zero_limbs; cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits); if (cy_limb != 0) tp[vsize] = cy_limb; }
REGPARM_ATTR (1) static void mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub) { mp_size_t xsize, ysize, tsize, wsize, wsize_signed; mp_ptr wp, tp; mp_limb_t c, high; TMP_DECL (marker); /* w unaffected if x==0 or y==0 */ xsize = SIZ(x); ysize = SIZ(y); if (xsize == 0 || ysize == 0) return; /* make x the bigger of the two */ if (ABS(ysize) > ABS(xsize)) { MPZ_SRCPTR_SWAP (x, y); MP_SIZE_T_SWAP (xsize, ysize); } sub ^= ysize; ysize = ABS(ysize); /* use mpn_addmul_1/mpn_submul_1 if possible */ if (ysize == 1) { mpz_aorsmul_1 (w, x, PTR(y)[0], sub); return; } sub ^= xsize; xsize = ABS(xsize); wsize_signed = SIZ(w); sub ^= wsize_signed; wsize = ABS(wsize_signed); tsize = xsize + ysize; MPZ_REALLOC (w, MAX (wsize, tsize) + 1); wp = PTR(w); if (wsize_signed == 0) { /* Nothing to add to, just set w=x*y. No w==x or w==y overlap here, since we know x,y!=0 but w==0. */ high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize); tsize -= (high == 0); SIZ(w) = (sub >= 0 ? tsize : -tsize); return; } TMP_MARK (marker); tp = TMP_ALLOC_LIMBS (tsize); high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize); tsize -= (high == 0); ASSERT (tp[tsize-1] != 0); if (sub >= 0) { mp_srcptr up = wp; mp_size_t usize = wsize; if (usize < tsize) { up = tp; usize = tsize; tp = wp; tsize = wsize; wsize = usize; } c = mpn_add (wp, up,usize, tp,tsize); wp[wsize] = c; wsize += (c != 0); } else { mp_srcptr up = wp; mp_size_t usize = wsize; if (mpn_cmp_twosizes_lt (up,usize, tp,tsize)) { up = tp; usize = tsize; tp = wp; tsize = wsize; wsize = usize; wsize_signed = -wsize_signed; } ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize)); wsize = usize; MPN_NORMALIZE (wp, wsize); } SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize); TMP_FREE (marker); }
int mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) { mp_size_t scratch; mp_size_t matrix_scratch; mp_ptr tp; TMP_DECL; ASSERT (n > 0); ASSERT ( (ap[n-1] | bp[n-1]) > 0); ASSERT ( (bp[0] | ap[0]) & 1); /* FIXME: Check for small sizes first, before setting up temporary storage etc. */ scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) { mp_size_t hgcd_scratch; mp_size_t update_scratch; mp_size_t p = CHOOSE_P (n); mp_size_t dc_scratch; matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); hgcd_scratch = mpn_hgcd_itch (n - p); update_scratch = p + n - 1; dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); if (dc_scratch > scratch) scratch = dc_scratch; } TMP_MARK; tp = TMP_ALLOC_LIMBS(scratch); while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) { struct hgcd_matrix M; mp_size_t p = 2*n/3; mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); mp_size_t nn; mpn_hgcd_matrix_init (&M, n - p, tp); nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, tp + matrix_scratch); if (nn > 0) { ASSERT (M.n <= (n - p - 1)/2); ASSERT (M.n + p <= (p + n - 1) / 2); /* Temporary storage 2 (p + M->n) <= p + n - 1. */ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); } else { /* Temporary storage n */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); if (!n) { TMP_FREE; return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); } } } while (n > 2) { struct hgcd_matrix1 M; mp_limb_t ah, al, bh, bl; mp_limb_t mask; mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_nhgcd2 step */ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) { n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); MP_PTR_SWAP (ap, tp); } else { /* mpn_hgcd2 has failed. Then either one of a or b is very small, or the difference is very small. Perform one subtraction followed by one division. */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); if (!n) { TMP_FREE; return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); } } } if (bits >= 16) MP_PTR_SWAP (ap, bp); ASSERT (bp[0] & 1); if (n == 1) { mp_limb_t al, bl; al = ap[0]; bl = bp[0]; TMP_FREE; if (bl == 1) return 1 - 2*(bits & 1); else return mpn_jacobi_base (al, bl, bits << 1); } else { int res = mpn_jacobi_2 (ap, bp, bits & 1); TMP_FREE; return res; } }
mp_size_t mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n) { mp_size_t talloc; mp_size_t scratch; mp_size_t matrix_scratch; mp_size_t ualloc = n + 1; struct gcdext_ctx ctx; mp_size_t un; mp_ptr u0; mp_ptr u1; mp_ptr tp; TMP_DECL; ASSERT (an >= n); ASSERT (n > 0); ASSERT (bp[n-1] > 0); TMP_MARK; /* FIXME: Check for small sizes first, before setting up temporary storage etc. */ talloc = MPN_GCDEXT_LEHMER_N_ITCH(n); /* For initial division */ scratch = an - n + 1; if (scratch > talloc) talloc = scratch; if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD)) { /* For hgcd loop. */ mp_size_t hgcd_scratch; mp_size_t update_scratch; mp_size_t p1 = CHOOSE_P_1 (n); mp_size_t p2 = CHOOSE_P_2 (n); mp_size_t min_p = MIN(p1, p2); mp_size_t max_p = MAX(p1, p2); matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p); hgcd_scratch = mpn_hgcd_itch (n - min_p); update_scratch = max_p + n - 1; scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); if (scratch > talloc) talloc = scratch; /* Final mpn_gcdext_lehmer_n call. Need space for u and for copies of a and b. */ scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD) + 3*GCDEXT_DC_THRESHOLD; if (scratch > talloc) talloc = scratch; /* Cofactors u0 and u1 */ talloc += 2*(n+1); } tp = TMP_ALLOC_LIMBS(talloc); if (an > n) { mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n); if (mpn_zero_p (ap, n)) { MPN_COPY (gp, bp, n); *usizep = 0; TMP_FREE; return n; } } if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD)) { mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp); TMP_FREE; return gn; } MPN_ZERO (tp, 2*ualloc); u0 = tp; tp += ualloc; u1 = tp; tp += ualloc; ctx.gp = gp; ctx.up = up; ctx.usize = usizep; { /* For the first hgcd call, there are no u updates, and it makes some sense to use a different choice for p. */ /* FIXME: We could trim use of temporary storage, since u0 and u1 are not used yet. For the hgcd call, we could swap in the u0 and u1 pointers for the relevant matrix elements. */ struct hgcd_matrix M; mp_size_t p = CHOOSE_P_1 (n); mp_size_t nn; mpn_hgcd_matrix_init (&M, n - p, tp); nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch); if (nn > 0) { ASSERT (M.n <= (n - p - 1)/2); ASSERT (M.n + p <= (p + n - 1) / 2); /* Temporary storage 2 (p + M->n) <= p + n - 1 */ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); MPN_COPY (u0, M.p[1][0], M.n); MPN_COPY (u1, M.p[1][1], M.n); un = M.n; while ( (u0[un-1] | u1[un-1] ) == 0) un--; } else { /* mpn_hgcd has failed. Then either one of a or b is very small, or the difference is very small. Perform one subtraction followed by one division. */ u1[0] = 1; ctx.u0 = u0; ctx.u1 = u1; ctx.tp = tp + n; /* ualloc */ ctx.un = 1; /* Temporary storage n */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp); if (n == 0) { TMP_FREE; return ctx.gn; } un = ctx.un; ASSERT (un < ualloc); } } while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD)) { struct hgcd_matrix M; mp_size_t p = CHOOSE_P_2 (n); mp_size_t nn; mpn_hgcd_matrix_init (&M, n - p, tp); nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch); if (nn > 0) { mp_ptr t0; t0 = tp + matrix_scratch; ASSERT (M.n <= (n - p - 1)/2); ASSERT (M.n + p <= (p + n - 1) / 2); /* Temporary storage 2 (p + M->n) <= p + n - 1 */ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0); /* By the same analysis as for mpn_hgcd_matrix_mul */ ASSERT (M.n + un <= ualloc); /* FIXME: This copying could be avoided by some swapping of * pointers. May need more temporary storage, though. */ MPN_COPY (t0, u0, un); /* Temporary storage ualloc */ un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un); ASSERT (un < ualloc); ASSERT ( (u0[un-1] | u1[un-1]) > 0); } else { /* mpn_hgcd has failed. Then either one of a or b is very small, or the difference is very small. Perform one subtraction followed by one division. */ ctx.u0 = u0; ctx.u1 = u1; ctx.tp = tp + n; /* ualloc */ ctx.un = un; /* Temporary storage n */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp); if (n == 0) { TMP_FREE; return ctx.gn; } un = ctx.un; ASSERT (un < ualloc); } } /* We have A = ... a + ... b B = u0 a + u1 b a = u1 A + ... B b = -u0 A + ... B with bounds |u0|, |u1| <= B / min(a, b) We always have u1 > 0, and u0 == 0 is possible only if u1 == 1, in which case the only reduction done so far is a = A - k B for some k. Compute g = u a + v b = (u u1 - v u0) A + (...) B Here, u, v are bounded by |u| <= b, |v| <= a */ ASSERT ( (ap[n-1] | bp[n-1]) > 0); if (UNLIKELY (mpn_cmp (ap, bp, n) == 0)) { /* Must return the smallest cofactor, +u1 or -u0 */ int c; MPN_COPY (gp, ap, n); MPN_CMP (c, u0, u1, un); /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in this case we choose the cofactor + 1, corresponding to G = A - k B, rather than -1, corresponding to G = - A + (k+1) B. */ ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1)); if (c < 0) { MPN_NORMALIZE (u0, un); MPN_COPY (up, u0, un); *usizep = -un; } else { MPN_NORMALIZE_NOT_ZERO (u1, un); MPN_COPY (up, u1, un); *usizep = un; } TMP_FREE; return n; } else if (UNLIKELY (u0[0] == 0) && un == 1) { mp_size_t gn; ASSERT (u1[0] == 1); /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */ gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp); TMP_FREE; return gn; } else { mp_size_t u0n; mp_size_t u1n; mp_size_t lehmer_un; mp_size_t lehmer_vn; mp_size_t gn; mp_ptr lehmer_up; mp_ptr lehmer_vp; int negate; lehmer_up = tp; tp += n; /* Call mpn_gcdext_lehmer_n with copies of a and b. */ MPN_COPY (tp, ap, n); MPN_COPY (tp + n, bp, n); gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n); u0n = un; MPN_NORMALIZE (u0, u0n); ASSERT (u0n > 0); if (lehmer_un == 0) { /* u == 0 ==> v = g / b == 1 ==> g = - u0 A + (...) B */ MPN_COPY (up, u0, u0n); *usizep = -u0n; TMP_FREE; return gn; } lehmer_vp = tp; /* Compute v = (g - u a) / b */ lehmer_vn = compute_v (lehmer_vp, ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1); if (lehmer_un > 0) negate = 0; else { lehmer_un = -lehmer_un; negate = 1; } u1n = un; MPN_NORMALIZE (u1, u1n); ASSERT (u1n > 0); ASSERT (lehmer_un + u1n <= ualloc); ASSERT (lehmer_vn + u0n <= ualloc); /* We may still have v == 0 */ /* Compute u u0 */ if (lehmer_un <= u1n) /* Should be the common case */ mpn_mul (up, u1, u1n, lehmer_up, lehmer_un); else mpn_mul (up, lehmer_up, lehmer_un, u1, u1n); un = u1n + lehmer_un; un -= (up[un - 1] == 0); if (lehmer_vn > 0) { mp_limb_t cy; /* Overwrites old u1 value */ if (lehmer_vn <= u0n) /* Should be the common case */ mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn); else mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n); u1n = u0n + lehmer_vn; u1n -= (u1[u1n - 1] == 0); if (u1n <= un) { cy = mpn_add (up, up, un, u1, u1n); } else { cy = mpn_add (up, u1, u1n, up, un); un = u1n; } up[un] = cy; un += (cy != 0); ASSERT (un < ualloc); } *usizep = negate ? -un : un; TMP_FREE; return gn; } }
y = y1 2^(n2 GMP_NUMB_BITS) + y0 */ /* x0 * y0 */ mpn_mul_n (rp, xp, yp, n2); if (n1 != n2) rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]); /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */ mpn_mullow_n (tp, xp + n1, yp, n2); mpn_add_n (rp + n1, rp + n1, tp, n2); /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */ mpn_mullow_n (tp, yp + n2, xp, n1); mpn_add_n (rp + n2, rp + n2, tp, n1); TMP_SFREE; } else { /* For really large operands, use plain mpn_mul_n but throw away upper n limbs of result. */ mp_ptr tp; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * n); mpn_mul_n (tp, xp, yp, n); MPN_COPY (rp, tp, n); TMP_FREE; } }
/* mpz_mul -- Multiply two integers. Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include <stdio.h> /* for NULL */ #include "gmp.h" #include "gmp-impl.h" #ifdef BERKELEY_MP #include "mp.h" #endif void #ifndef BERKELEY_MP mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) #else /* BERKELEY_MP */ mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w) #endif /* BERKELEY_MP */ { mp_size_t usize; mp_size_t vsize; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_ptr free_me; size_t free_me_size; mp_limb_t cy_limb; TMP_DECL; usize = SIZ (u); vsize = SIZ (v); sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize < vsize) { MPZ_SRCPTR_SWAP (u, v); MP_SIZE_T_SWAP (usize, vsize); } if (vsize == 0) { SIZ(w) = 0; return; } #if HAVE_NATIVE_mpn_mul_2 if (vsize <= 2) { MPZ_REALLOC (w, usize+vsize); wp = PTR(w); if (vsize == 1) cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]); else { cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v)); usize++; } wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ(w) = (sign_product >= 0 ? usize : -usize); return; } #else if (vsize == 1) { MPZ_REALLOC (w, usize+1); wp = PTR(w); cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]); wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ(w) = (sign_product >= 0 ? usize : -usize); return; } #endif TMP_MARK; free_me = NULL; up = PTR(u); vp = PTR(v); wp = PTR(w); /* Ensure W has space enough to store the result. */ wsize = usize + vsize; if (ALLOC(w) < wsize) { if (wp == up || wp == vp) { free_me = wp; free_me_size = ALLOC(w); } else (*__gmp_free_func) (wp, ALLOC(w) * BYTES_PER_MP_LIMB); ALLOC(w) = wsize; wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB); PTR(w) = wp; } else { /* Make U and V not overlap with W. */ if (wp == up) { /* W and U are identical. Allocate temporary space for U. */ up = TMP_ALLOC_LIMBS (usize); /* Is V identical too? Keep it identical with U. */ if (wp == vp) vp = up; /* Copy to the temporary space. */ MPN_COPY (up, wp, usize); } else if (wp == vp) { /* W and V are identical. Allocate temporary space for V. */ vp = TMP_ALLOC_LIMBS (vsize); /* Copy to the temporary space. */ MPN_COPY (vp, wp, vsize); } } if (up == vp) { mpn_sqr (wp, up, usize); cy_limb = wp[wsize - 1]; } else { cy_limb = mpn_mul (wp, up, usize, vp, vsize); } wsize -= cy_limb == 0; SIZ(w) = sign_product < 0 ? -wsize : wsize; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); TMP_FREE; }