void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f, mp_bitcnt_t bit_size) { slong len; mpz_t tmp; if (fmpz_sgn(f) < 0) { flint_printf("Exception (fmpz_poly_bit_unpack_unsigned). Expected an unsigned value.\n"); abort(); } if (bit_size == 0 || fmpz_is_zero(f)) { fmpz_poly_zero(poly); return; } len = (fmpz_bits(f) + bit_size - 1) / bit_size; mpz_init2(tmp, bit_size*len); flint_mpn_zero(tmp->_mp_d, tmp->_mp_alloc); fmpz_get_mpz(tmp, f); fmpz_poly_fit_length(poly, len); _fmpz_poly_bit_unpack_unsigned(poly->coeffs, len, tmp->_mp_d, bit_size); _fmpz_poly_set_length(poly, len); _fmpz_poly_normalise(poly); mpz_clear(tmp); }
void _nmod_poly_div_newton(mp_ptr Q, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const slong lenQ = lenA - lenB + 1; mp_ptr Arev, Brev; Arev = _nmod_vec_init(2 * lenQ); Brev = Arev + lenQ; _nmod_poly_reverse(Arev, A + (lenA - lenQ), lenQ, lenQ); if (lenB >= lenQ) { _nmod_poly_reverse(Brev, B + (lenB - lenQ), lenQ, lenQ); } else { _nmod_poly_reverse(Brev, B, lenB, lenB); flint_mpn_zero(Brev + lenB, lenQ - lenB); } _nmod_poly_div_series(Q, Arev, Brev, lenQ, mod); _nmod_poly_reverse(Q, Q, lenQ, lenQ); _nmod_vec_clear(Arev); }
/* truncates, returns inexact */ int _arf_get_integer_mpn(mp_ptr y, mp_srcptr x, mp_size_t xn, slong exp) { slong bot_exp = exp - xn * FLINT_BITS; if (bot_exp >= 0) { mp_size_t bot_limbs; mp_bitcnt_t bot_bits; bot_limbs = bot_exp / FLINT_BITS; bot_bits = bot_exp % FLINT_BITS; flint_mpn_zero(y, bot_limbs); if (bot_bits == 0) flint_mpn_copyi(y + bot_limbs, x, xn); else y[bot_limbs + xn] = mpn_lshift(y + bot_limbs, x, xn, bot_bits); /* exact */ return 0; } else if (exp <= 0) { /* inexact */ return 1; } else { mp_size_t top_limbs; mp_bitcnt_t top_bits; mp_limb_t cy; top_limbs = exp / FLINT_BITS; top_bits = exp % FLINT_BITS; if (top_bits == 0) { flint_mpn_copyi(y, x + xn - top_limbs, top_limbs); /* inexact */ return 1; } else { /* can be inexact */ cy = mpn_rshift(y, x + xn - top_limbs - 1, top_limbs + 1, FLINT_BITS - top_bits); return (cy != 0) || (top_limbs + 1 != xn); } } }
/* computes x + y * 2^shift (optionally negated) */ slong _fmpr_add_mpn(fmpr_t z, mp_srcptr xman, mp_size_t xn, int xsign, const fmpz_t xexp, mp_srcptr yman, mp_size_t yn, int ysign, const fmpz_t yexp, slong shift, slong prec, fmpr_rnd_t rnd) { slong tn, zn, alloc, ret, shift_bits, shift_limbs; int negative; mp_limb_t tmp_stack[ADD_STACK_ALLOC]; mp_limb_t cy; mp_ptr tmp, tmp2; shift_limbs = shift / FLINT_BITS; shift_bits = shift % FLINT_BITS; /* x does not overlap with y or the result -- outcome is equivalent to adding/subtracting a small number to/from y and rounding */ if (shift > xn * FLINT_BITS && prec != FMPR_PREC_EXACT && xn * FLINT_BITS + prec - (FLINT_BITS * (yn - 1)) < shift) { zn = (prec + FLINT_BITS - 1) / FLINT_BITS; zn = FLINT_MAX(zn, yn) + 2; shift_limbs = zn - yn; alloc = zn; ADD_TMP_ALLOC flint_mpn_zero(tmp, shift_limbs); flint_mpn_copyi(tmp + shift_limbs, yman, yn); if (xsign == ysign) { tmp[0] = 1; } else { mpn_sub_1(tmp, tmp, zn, 1); while (tmp[zn-1] == 0) zn--; } ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, ysign, prec, rnd); shift -= shift_limbs * FLINT_BITS; fmpz_add_si_inline(fmpr_expref(z), yexp, shift); ADD_TMP_FREE return ret; }
void fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f, mp_bitcnt_t bit_size) { slong len; mpz_t tmp; int negate, borrow; if (bit_size == 0 || fmpz_is_zero(f)) { fmpz_poly_zero(poly); return; } /* Round up */ len = (fmpz_bits(f) + bit_size - 1) / bit_size; negate = (fmpz_sgn(f) < 0) ? -1 : 0; mpz_init2(tmp, bit_size*len); /* TODO: avoid all this wastefulness */ flint_mpn_zero(tmp->_mp_d, tmp->_mp_alloc); fmpz_get_mpz(tmp, f); fmpz_poly_fit_length(poly, len + 1); borrow = _fmpz_poly_bit_unpack(poly->coeffs, len, tmp->_mp_d, bit_size, negate); if (borrow) { fmpz_set_si(poly->coeffs + len, negate ? WORD(-1) : WORD(1)); _fmpz_poly_set_length(poly, len + 1); } else { _fmpz_poly_set_length(poly, len); _fmpz_poly_normalise(poly); } mpz_clear(tmp); }
void nmod_poly_asin_series(nmod_poly_t g, const nmod_poly_t h, slong n) { mp_ptr h_coeffs; slong h_len = h->length; if (h_len > 0 && h->coeffs[0] != UWORD(0)) { flint_printf("Exception (nmod_poly_asin_series). Constant term != 0.\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); return; } nmod_poly_fit_length(g, n); if (h_len < n) { h_coeffs = _nmod_vec_init(n); flint_mpn_copyi(h_coeffs, h->coeffs, h_len); flint_mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; _nmod_poly_asin_series(g->coeffs, h_coeffs, n, h->mod); if (h_len < n) _nmod_vec_clear(h_coeffs); g->length = n; _nmod_poly_normalise(g); }
slong _nmod_poly_xgcd_hgcd(mp_ptr G, mp_ptr S, mp_ptr T, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const slong cutoff = FLINT_BIT_COUNT(mod.n) <= 8 ? NMOD_POLY_SMALL_GCD_CUTOFF : NMOD_POLY_GCD_CUTOFF; slong lenG, lenS, lenT; if (lenB == 1) { G[0] = B[0]; T[0] = 1; lenG = 1; lenS = 0; lenT = 1; } else { mp_ptr q = _nmod_vec_init(lenA + lenB); mp_ptr r = q + lenA; slong lenq, lenr; __divrem(q, lenq, r, lenr, A, lenA, B, lenB); if (lenr == 0) { __set(G, lenG, B, lenB); T[0] = 1; lenS = 0; lenT = 1; } else { mp_ptr h, j, v, w, R[4], X; slong lenh, lenj, lenv, lenw, lenR[4]; int sgnR; lenh = lenj = lenB; lenv = lenw = lenA + lenB - 2; lenR[0] = lenR[1] = lenR[2] = lenR[3] = (lenB + 1) / 2; X = _nmod_vec_init(2 * lenh + 2 * lenv + 4 * lenR[0]); h = X; j = h + lenh; v = j + lenj; w = v + lenv; R[0] = w + lenw; R[1] = R[0] + lenR[0]; R[2] = R[1] + lenR[1]; R[3] = R[2] + lenR[2]; sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, B, lenB, r, lenr, mod); if (sgnR > 0) { _nmod_vec_neg(S, R[1], lenR[1], mod); _nmod_vec_set(T, R[0], lenR[0]); } else { _nmod_vec_set(S, R[1], lenR[1]); _nmod_vec_neg(T, R[0], lenR[0], mod); } lenS = lenR[1]; lenT = lenR[0]; while (lenj != 0) { __divrem(q, lenq, r, lenr, h, lenh, j, lenj); __mul(v, lenv, q, lenq, T, lenT); { slong l; _nmod_vec_swap(S, T, FLINT_MAX(lenS, lenT)); l = lenS; lenS = lenT; lenT = l; } __sub(T, lenT, T, lenT, v, lenv); if (lenr == 0) { __set(G, lenG, j, lenj); goto cofactor; } if (lenj < cutoff) { mp_ptr u0 = R[0], u1 = R[1]; slong lenu0 = lenr - 1, lenu1 = lenj - 1; lenG = _nmod_poly_xgcd_euclidean(G, u0, u1, j, lenj, r, lenr, mod); MPN_NORM(u0, lenu0); MPN_NORM(u1, lenu1); __mul(v, lenv, S, lenS, u0, lenu0); __mul(w, lenw, T, lenT, u1, lenu1); __add(S, lenS, v, lenv, w, lenw); goto cofactor; } sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, j,lenj, r, lenr, mod); __mul(v, lenv, R[1], lenR[1], T, lenT); __mul(w, lenw, R[2], lenR[2], S, lenS); __mul(q, lenq, S, lenS, R[3], lenR[3]); if (sgnR > 0) __sub(S, lenS, q, lenq, v, lenv); else __sub(S, lenS, v, lenv, q, lenq); __mul(q, lenq, T, lenT, R[0], lenR[0]); if (sgnR > WORD(0)) __sub(T, lenT, q, lenq, w, lenw); else __sub(T, lenT, w, lenw, q, lenq); } __set(G, lenG, h, lenh); cofactor: __mul(v, lenv, S, lenS, A, lenA); __sub(w, lenw, G, lenG, v, lenv); __div(T, lenT, w, lenw, B, lenB); _nmod_vec_clear(X); } _nmod_vec_clear(q); } flint_mpn_zero(S + lenS, lenB - 1 - lenS); flint_mpn_zero(T + lenT, lenA - 1 - lenT); return lenG; }
void _nmod_poly_divrem_divconquer_recursive(mp_ptr Q, mp_ptr BQ, mp_ptr W, mp_ptr V, mp_srcptr A, mp_srcptr B, slong lenB, nmod_t mod) { if (lenB <= NMOD_DIVREM_DIVCONQUER_CUTOFF) { mp_ptr t = V; mp_ptr w = t + 2*lenB - 1; flint_mpn_copyi(t + lenB - 1, A + lenB - 1, lenB); flint_mpn_zero(t, lenB - 1); _nmod_poly_divrem_basecase(Q, BQ, w, t, 2 * lenB - 1, B, lenB, mod); /* BQ = A - R */ _nmod_vec_neg(BQ, BQ, lenB - 1, mod); } else { const slong n2 = lenB / 2; const slong n1 = lenB - n2; mp_ptr W1 = W; mp_ptr W2 = W + n2; mp_srcptr p1 = A + 2 * n2; mp_srcptr p2; mp_srcptr d1 = B + n2; mp_srcptr d2 = B; mp_srcptr d3 = B + n1; mp_srcptr d4 = B; mp_ptr q1 = Q + n2; mp_ptr q2 = Q; mp_ptr dq1 = BQ + n2; mp_ptr d1q1 = BQ + n2 - (n1 - 1); mp_ptr d2q1, d3q2, d4q2, t; /* Set q1 to p1 div d1, a 2 n1 - 1 by n1 division so q1 ends up being of length n1; low(d1q1) = d1 q1 is of length n1 - 1 */ _nmod_poly_divrem_divconquer_recursive(q1, d1q1, W1, V, p1, d1, n1, mod); /* Compute bottom n1 + n2 - 1 coeffs of d2q1 = d2 q1 */ d2q1 = W1; _nmod_poly_mullow(d2q1, q1, n1, d2, n2, n1 + n2 - 1, mod); /* Compute dq1 = d1 q1 x^n2 + d2 q1, of length n1 + n2 - 1 Split it into a segment of length n1 - 1 at dq1 and a piece of length n2 at BQ. */ flint_mpn_copyi(dq1, d2q1, n1 - 1); if (n2 > n1 - 1) BQ[0] = d2q1[n1 - 1]; _nmod_vec_add(d1q1, d1q1, d2q1 + n2, n1 - 1, mod); /* Compute t = A/x^n2 - dq1, which has length 2 n1 + n2 - 1, but we are not interested in the top n1 coeffs as they will be zero, so this has effective length n1 + n2 - 1 For the following division, we want to set {p2, 2 n2 - 1} to the top 2 n2 - 1 coeffs of this Since the bottom n2 - 1 coeffs of p2 are irrelevant for the division, we in fact set {t, n2} to the relevant coeffs */ t = W1; _nmod_vec_sub(t, A + n2 + (n1 - 1), BQ, n2, mod); p2 = t - (n2 - 1); /* Compute q2 = t div d3, a 2 n2 - 1 by n2 division, so q2 will have length n2; let low(d3q2) = d3 q2, of length n2 - 1 */ d3q2 = BQ; _nmod_poly_divrem_divconquer_recursive(q2, d3q2, W2, V, p2, d3, n2, mod); /* Compute d4q2 = d4 q2, of length n1 + n2 - 1 */ d4q2 = W1; _nmod_poly_mullow(d4q2, d4, n1, q2, n2, n1 + n2 - 1, mod); /* Compute dq2 = d3q2 x^n1 + d4q2, of length n1 + n2 - 1 */ _nmod_vec_add(BQ + n1, BQ + n1, d3q2, n2 - 1, mod); flint_mpn_copyi(BQ, d4q2, n2); _nmod_vec_add(BQ + n2, BQ + n2, d4q2 + n2, n1 - 1, mod); /* Note Q = q1 x^n2 + q2, and BQ = dq1 x^n2 + dq2 */ } }
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly, int alternating) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; int cosorsin; TMP_INIT; TMP_START; if (2 * N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_sin_cos_taylor_rs: N too large!\n"); abort(); } if (N <= 1) { if (N == 0) { flint_mpn_zero(ysin, xn); if (!sinonly) flint_mpn_zero(ycos, xn); error[0] = 0; } else if (N == 1) { flint_mpn_copyi(ysin, x, xn); if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES); error[0] = 1; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | <temp> | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) mpn_sqr(XPOW_WRITE(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } for (cosorsin = sinonly; cosorsin < 2; cosorsin++) { flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[2 * k + cosorsin]; new_denom = factorial_tab_denom[2 * k + cosorsin]; old_denom = factorial_tab_denom[2 * k + cosorsin + 2]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { if (alternating && (k % 2 == 0)) s[xn] += old_denom; mpn_divrem_1(s, 0, s, xn + 1, old_denom); if (alternating && (k % 2 == 0)) s[xn] -= 1; } if (power == 0) { /* add c * x^0 -- only top limb is affected */ if (alternating & k) s[xn] -= c; else s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { if (alternating & k) s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c); else s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ if (cosorsin == 0) { mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]); /* perturb down to a number < 1 if necessary. note that this does not invalidate the error bound: 1 - ulp is either 1 ulp too small or must be closer to the exact value */ if (t[xn] == 0) flint_mpn_copyi(ycos, t, xn); else flint_mpn_store(ycos, xn, LIMB_ONES); } else { mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]); mpn_mul(t, s, xn + 1, x, xn); flint_mpn_copyi(ysin, t + xn, xn); } } /* error bound (ulp) */ error[0] = 2; } TMP_END; }
void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; TMP_INIT; TMP_START; if (N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_exp_taylor_rs: N too large!\n"); abort(); } if (N <= 3) { if (N <= 1) { flint_mpn_zero(y, xn); y[xn] = N; error[0] = 0; } else if (N == 2) { flint_mpn_copyi(y, x, xn); y[xn] = 1; error[0] = 0; } else { /* 1 + x + x^2 / 2 */ t = TMP_ALLOC_LIMBS(2 * xn); mpn_sqr(t, x, xn); mpn_rshift(t + xn, t + xn, xn, 1); y[xn] = mpn_add_n(y, x, t + xn, xn) + 1; error[0] = 2; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ /* TODO: drop evenness assumption since we don't have sign issues here? */ /* TODO: then just need to fix power construction below... */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | <temp> | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) flint_mpn_copyi(XPOW_READ(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[k]; new_denom = factorial_tab_denom[k]; old_denom = factorial_tab_denom[k+1]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { mpn_divrem_1(s, 0, s, xn + 1, old_denom); } if (power == 0) { /* add c * x^0 -- only top limb is affected */ s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ mpn_divrem_1(y, 0, s, xn + 1, factorial_tab_denom[0]); /* error bound (ulp) */ error[0] = 2; } TMP_END; }
void _arb_atan_taylor_naive(mp_ptr y, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int alternating) { ulong k; mp_ptr s, t, x1, x2, u; mp_size_t nn = xn + 1; if (N == 0) { flint_mpn_zero(y, xn); error[0] = 0; return; } if (N == 1) { flint_mpn_copyi(y, x, xn); error[0] = 0; } s = flint_malloc(sizeof(mp_limb_t) * nn); t = flint_malloc(sizeof(mp_limb_t) * nn); u = flint_malloc(sizeof(mp_limb_t) * 2 * nn); x1 = flint_malloc(sizeof(mp_limb_t) * nn); x2 = flint_malloc(sizeof(mp_limb_t) * nn); flint_mpn_zero(s, nn); flint_mpn_zero(t, nn); flint_mpn_zero(u, 2 * nn); flint_mpn_zero(x1, nn); flint_mpn_zero(x2, nn); /* x1 = x */ flint_mpn_copyi(x1 + 1, x, xn); /* x2 = x * x */ mpn_mul_n(u, x1, x1, nn); flint_mpn_copyi(x2, u + nn, nn); /* s = t = x */ flint_mpn_copyi(s, x1, nn); flint_mpn_copyi(t, x1, nn); for (k = 1; k < N; k++) { /* t = t * x2 */ mpn_mul_n(u, t, x2, nn); flint_mpn_copyi(t, u + nn, nn); /* u = t / (2k+1) */ mpn_divrem_1(u, 0, t, nn, 2 * k + 1); if (alternating & k) mpn_sub_n(s, s, u, nn); else mpn_add_n(s, s, u, nn); } flint_mpn_copyi(y, s + 1, xn); error[0] = 2; flint_free(s); flint_free(t); flint_free(u); flint_free(x1); flint_free(x2); }
int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error, const arf_t x, mp_size_t wn) { mp_srcptr xp; mp_size_t xn; int negative; long exp; ARF_GET_MPN_READONLY(xp, xn, x); exp = ARF_EXP(x); negative = ARF_SGNBIT(x); if (exp <= -1) { /* todo: just zero top */ flint_mpn_zero(w, wn); *error = _arf_get_integer_mpn(w, xp, xn, exp + wn * FLINT_BITS); if (!negative) { fmpz_zero(q); } else { if (wn > ARB_LOG_TAB2_LIMBS) return 0; mpn_sub_n(w, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - wn, w, wn); *error += 1; /* log(2) has 1 ulp error */ fmpz_set_si(q, -1); } return 1; /* success */ } else { mp_ptr qp, rp, np; mp_srcptr dp; mp_size_t qn, rn, nn, dn, tn, alloc; TMP_INIT; tn = ((exp + 2) + FLINT_BITS - 1) / FLINT_BITS; dn = wn + tn; /* denominator */ nn = wn + 2 * tn; /* numerator */ qn = nn - dn + 1; /* quotient */ rn = dn; /* remainder */ if (dn > ARB_LOG_TAB2_LIMBS) return 0; TMP_START; alloc = qn + rn + nn; qp = TMP_ALLOC_LIMBS(alloc); rp = qp + qn; np = rp + rn; dp = arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - dn; /* todo: prove that zeroing is unnecessary */ flint_mpn_zero(np, nn); _arf_get_integer_mpn(np, xp, xn, exp + dn * FLINT_BITS); mpn_tdiv_qr(qp, rp, 0, np, nn, dp, dn); if (!negative) { flint_mpn_copyi(w, rp + tn, wn); *error = 2; } else { if (mpn_add_1(qp, qp, qn, 1)) { /* I believe this cannot happen (should prove it) */ printf("mod log(2): unexpected carry\n"); abort(); } mpn_sub_n(w, dp + tn, rp + tn, wn); *error = 3; } /* read the exponent */ while (qn > 1 && qp[qn-1] == 0) qn--; if (qn == 1) { if (!negative) fmpz_set_ui(q, qp[0]); else fmpz_neg_ui(q, qp[0]); } else { fmpz_set_mpn_large(q, qp, qn, negative); } TMP_END; return 1; } }
void arb_atan_arf(arb_t z, const arf_t x, slong prec) { if (arf_is_special(x)) { if (arf_is_zero(x)) { arb_zero(z); } else if (arf_is_pos_inf(x)) { arb_const_pi(z, prec); arb_mul_2exp_si(z, z, -1); } else if (arf_is_neg_inf(x)) { arb_const_pi(z, prec); arb_mul_2exp_si(z, z, -1); arb_neg(z, z); } else { arb_indeterminate(z); } } else if (COEFF_IS_MPZ(*ARF_EXPREF(x))) { if (fmpz_sgn(ARF_EXPREF(x)) < 0) arb_atan_eps(z, x, prec); else arb_atan_inf_eps(z, x, prec); } else { slong exp, wp, wn, N, r; mp_srcptr xp; mp_size_t xn, tn; mp_ptr tmp, w, t, u; mp_limb_t p1, q1bits, p2, q2bits, error, error2; int negative, inexact, reciprocal; TMP_INIT; exp = ARF_EXP(x); negative = ARF_SGNBIT(x); if (exp < -(prec/2) - 2 || exp > prec + 2) { if (exp < 0) arb_atan_eps(z, x, prec); else arb_atan_inf_eps(z, x, prec); return; } ARF_GET_MPN_READONLY(xp, xn, x); /* Special case: +/- 1 (we require |x| != 1 later on) */ if (exp == 1 && xn == 1 && xp[xn-1] == LIMB_TOP) { arb_const_pi(z, prec); arb_mul_2exp_si(z, z, -2); if (negative) arb_neg(z, z); return; } /* Absolute working precision (NOT rounded to a limb multiple) */ wp = prec - FLINT_MIN(0, exp) + 4; /* Too high precision to use table */ if (wp > ARB_ATAN_TAB2_PREC) { arb_atan_arf_bb(z, x, prec); return; } /* Working precision in limbs */ wn = (wp + FLINT_BITS - 1) / FLINT_BITS; TMP_START; tmp = TMP_ALLOC_LIMBS(4 * wn + 3); w = tmp; /* requires wn+1 limbs */ t = w + wn + 1; /* requires wn+1 limbs */ u = t + wn + 1; /* requires 2wn+1 limbs */ /* ----------------------------------------------------------------- */ /* Convert x or 1/x to a fixed-point number |w| < 1 */ /* ----------------------------------------------------------------- */ if (exp <= 0) /* |x| < 1 */ { reciprocal = 0; /* todo: just zero top */ flint_mpn_zero(w, wn); /* w = x as a fixed-point number */ error = _arf_get_integer_mpn(w, xp, xn, exp + wn * FLINT_BITS); } else /* |x| > 1 */ { slong one_exp, one_limbs, one_bits; mp_ptr one; reciprocal = 1; one_exp = xn * FLINT_BITS + wn * FLINT_BITS - exp; flint_mpn_zero(w, wn); /* 1/x becomes zero */ if (one_exp >= FLINT_BITS - 1) { /* w = 1/x */ one_limbs = one_exp / FLINT_BITS; one_bits = one_exp % FLINT_BITS; if (one_limbs + 1 >= xn) { one = TMP_ALLOC_LIMBS(one_limbs + 1); flint_mpn_zero(one, one_limbs); one[one_limbs] = UWORD(1) << one_bits; /* todo: only zero necessary part */ flint_mpn_zero(w, wn); mpn_tdiv_q(w, one, one_limbs + 1, xp, xn); /* Now w must be < 1 since x > 1 and we rounded down; thus w[wn] must be zero */ } } /* todo: moderate powers of two would be exact... */ error = 1; } /* ----------------------------------------------------------------- */ /* Table-based argument reduction */ /* ----------------------------------------------------------------- */ /* choose p such that p/q <= x < (p+1)/q */ if (wp <= ARB_ATAN_TAB1_PREC) q1bits = ARB_ATAN_TAB1_BITS; else q1bits = ARB_ATAN_TAB21_BITS; p1 = w[wn-1] >> (FLINT_BITS - q1bits); /* atan(w) = atan(p/q) + atan(w2) */ /* where w2 = (q*w-p)/(q+p*w) */ if (p1 != 0) { t[wn] = (UWORD(1) << q1bits) + mpn_mul_1(t, w, wn, p1); flint_mpn_zero(u, wn); u[2 * wn] = mpn_lshift(u + wn, w, wn, q1bits) - p1; mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1); error++; /* w2 is computed with 1 ulp error */ } /* Do a second round of argument reduction */ if (wp <= ARB_ATAN_TAB1_PREC) { p2 = 0; } else { q2bits = ARB_ATAN_TAB21_BITS + ARB_ATAN_TAB22_BITS; p2 = w[wn-1] >> (FLINT_BITS - q2bits); if (p2 != 0) { t[wn] = (UWORD(1) << q2bits) + mpn_mul_1(t, w, wn, p2); flint_mpn_zero(u, wn); u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits) - p2; mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1); error++; } } /* |w| <= 2^-r */ r = _arb_mpn_leading_zeros(w, wn); /* N >= (wp-r)/(2r) */ N = (wp - r + (2*r-1)) / (2*r); /* Evaluate Taylor series */ _arb_atan_taylor_rs(t, &error2, w, wn, N, 1); /* Taylor series evaluation error */ error += error2; /* Size of output number */ tn = wn; /* First table lookup */ if (p1 != 0) { if (wp <= ARB_ATAN_TAB1_PREC) mpn_add_n(t, t, arb_atan_tab1[p1] + ARB_ATAN_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_atan_tab21[p1] + ARB_ATAN_TAB2_LIMBS - tn, tn); error++; } /* Second table lookup */ if (p2 != 0) { mpn_add_n(t, t, arb_atan_tab22[p2] + ARB_ATAN_TAB2_LIMBS - tn, tn); error++; } /* pi/2 - atan(1/x) */ if (reciprocal) { t[tn] = LIMB_ONE - mpn_sub_n(t, arb_atan_pi2_minus_one + ARB_ATAN_TAB2_LIMBS - tn, t, tn); /* result can be >= 1 */ tn += (t[tn] != 0); /* error of pi/2 */ error++; } /* The accumulated arithmetic error */ mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS); /* Truncation error from the Taylor series */ mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1)); /* Set the midpoint */ inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec, ARB_RND); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); TMP_END; } }
void mul_mfa_truncate_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1, mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w) { mp_size_t n = (UWORD(1)<<depth); mp_bitcnt_t bits1 = (n*w - (depth+1))/2; mp_size_t sqrt = (UWORD(1)<<(depth/2)); mp_size_t r_limbs = n1 + n2; mp_size_t limbs = (n*w)/FLINT_BITS; mp_size_t size = limbs + 1; mp_size_t j1 = (n1*FLINT_BITS - 1)/bits1 + 1; mp_size_t j2 = (n2*FLINT_BITS - 1)/bits1 + 1; mp_size_t i, j, trunc; mp_limb_t ** ii, ** jj, * t1, * t2, * s1, * ptr; mp_limb_t * tt; ii = flint_malloc((4*(n + n*size) + 5*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) { ii[i] = ptr; } t1 = ptr; t2 = t1 + size; s1 = t2 + size; tt = s1 + size; if (i1 != i2) { jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) { jj[i] = ptr; } } else jj = ii; trunc = j1 + j2 - 1; if (trunc <= 2*n) trunc = 2*n + 1; trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt)); /* trunc must be divisible by 2*sqrt */ j1 = fft_split_bits(ii, i1, n1, bits1, limbs); for (j = j1 ; j < 4*n; j++) flint_mpn_zero(ii[j], limbs + 1); fft_mfa_truncate_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc); if (i1 != i2) { j2 = fft_split_bits(jj, i2, n2, bits1, limbs); for (j = j2 ; j < 4*n; j++) flint_mpn_zero(jj[j], limbs + 1); fft_mfa_truncate_sqrt2_outer(jj, n, w, &t1, &t2, &s1, sqrt, trunc); } else j2 = j1; fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, &t1, &t2, &s1, sqrt, trunc, tt); ifft_mfa_truncate_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc); flint_mpn_zero(r1, r_limbs); fft_combine_bits(r1, ii, j1 + j2 - 1, bits1, limbs, r_limbs); flint_free(ii); if (i1 != i2) flint_free(jj); }
void arb_log_arf(arb_t z, const arf_t x, slong prec) { if (arf_is_special(x)) { if (arf_is_pos_inf(x)) arb_pos_inf(z); else arb_indeterminate(z); } else if (ARF_SGNBIT(x)) { arb_indeterminate(z); } else if (ARF_IS_POW2(x)) { if (fmpz_is_one(ARF_EXPREF(x))) { arb_zero(z); } else { fmpz_t exp; fmpz_init(exp); _fmpz_add_fast(exp, ARF_EXPREF(x), -1); arb_const_log2(z, prec + 2); arb_mul_fmpz(z, z, exp, prec); fmpz_clear(exp); } } else if (COEFF_IS_MPZ(*ARF_EXPREF(x))) { arb_log_arf_huge(z, x, prec); } else { slong exp, wp, wn, N, r, closeness_to_one; mp_srcptr xp; mp_size_t xn, tn; mp_ptr tmp, w, t, u; mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy; int negative, inexact, used_taylor_series; TMP_INIT; exp = ARF_EXP(x); negative = 0; ARF_GET_MPN_READONLY(xp, xn, x); /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */ closeness_to_one = 0; if (exp == 0) { slong i; closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]); if (closeness_to_one == FLINT_BITS) { for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i])); } } else if (exp == 1) { closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP)); if (closeness_to_one == FLINT_BITS) { slong i; for (i = xn - 2; xp[i] == 0; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i])); } closeness_to_one--; } /* if |t-1| <= 0.5 */ /* |log(1+t) - t| <= t^2 */ /* |log(1+t) - (t-t^2/2)| <= t^3 */ if (closeness_to_one > prec + 1) { inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); return; } else if (2 * closeness_to_one > prec + 1) { arf_t t, u; arf_init(t); arf_init(u); arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul_2exp_si(u, u, -1); inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); arf_clear(t); arf_clear(u); return; } /* Absolute working precision (NOT rounded to a limb multiple) */ wp = prec + closeness_to_one + 5; /* Too high precision to use table */ if (wp > ARB_LOG_TAB2_PREC) { arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND); arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec); return; } /* Working precision in limbs */ wn = (wp + FLINT_BITS - 1) / FLINT_BITS; TMP_START; tmp = TMP_ALLOC_LIMBS(4 * wn + 3); w = tmp; /* requires wn+1 limbs */ t = w + wn + 1; /* requires wn+1 limbs */ u = t + wn + 1; /* requires 2wn+1 limbs */ /* read x-1 */ if (xn <= wn) { flint_mpn_zero(w, wn - xn); mpn_lshift(w + wn - xn, xp, xn, 1); error = 0; } else { mpn_lshift(w, xp + xn - wn, wn, 1); error = 1; } /* First table-based argument reduction */ if (wp <= ARB_LOG_TAB1_PREC) q1bits = ARB_LOG_TAB11_BITS; else q1bits = ARB_LOG_TAB21_BITS; p1 = w[wn-1] >> (FLINT_BITS - q1bits); /* Special case: covers logarithms of small integers */ if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits)))) { p2 = 0; flint_mpn_zero(t, wn); used_taylor_series = 0; N = r = 0; /* silence compiler warning */ } else { /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */ w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1; mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits)); error += 1; /* Second table-based argument reduction (fused with log->atanh conversion) */ if (wp <= ARB_LOG_TAB1_PREC) q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS; else q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS; p2 = w[wn-1] >> (FLINT_BITS - q2bits); u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits); flint_mpn_zero(u, wn); flint_mpn_copyi(t, u + wn, wn + 1); t[wn] += p2 + (UWORD(1) << (q2bits + 1)); u[2 * wn] -= p2; mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1); /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */ error += 3; /* |w| <= 2^-r */ r = _arb_mpn_leading_zeros(w, wn); /* N >= (wp-r)/(2r) */ N = (wp - r + (2*r-1)) / (2*r); N = FLINT_MAX(N, 0); /* Evaluate Taylor series */ _arb_atan_taylor_rs(t, &error2, w, wn, N, 0); /* Multiply by 2 */ mpn_lshift(t, t, wn, 1); /* Taylor series evaluation error (multiply by 2) */ error += error2 * 2; used_taylor_series = 1; } /* Size of output number */ tn = wn; /* First table lookup */ if (p1 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* Second table lookup */ if (p2 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* add exp * log(2) */ exp--; if (exp > 0) { cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp); t[tn] = cy; tn += (cy != 0); error += exp; } else if (exp < 0) { t[tn] = 0; u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp); if (mpn_cmp(t, u, tn + 1) >= 0) { mpn_sub_n(t, t, u, tn + 1); } else { mpn_sub_n(t, u, t, tn + 1); negative = 1; } error += (-exp); tn += (t[tn] != 0); } /* The accumulated arithmetic error */ mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS); /* Truncation error from the Taylor series */ if (used_taylor_series) mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1); /* Set the midpoint */ inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); TMP_END; } }