/* truncates, returns inexact */ int _arf_get_integer_mpn(mp_ptr y, mp_srcptr x, mp_size_t xn, slong exp) { slong bot_exp = exp - xn * FLINT_BITS; if (bot_exp >= 0) { mp_size_t bot_limbs; mp_bitcnt_t bot_bits; bot_limbs = bot_exp / FLINT_BITS; bot_bits = bot_exp % FLINT_BITS; flint_mpn_zero(y, bot_limbs); if (bot_bits == 0) flint_mpn_copyi(y + bot_limbs, x, xn); else y[bot_limbs + xn] = mpn_lshift(y + bot_limbs, x, xn, bot_bits); /* exact */ return 0; } else if (exp <= 0) { /* inexact */ return 1; } else { mp_size_t top_limbs; mp_bitcnt_t top_bits; mp_limb_t cy; top_limbs = exp / FLINT_BITS; top_bits = exp % FLINT_BITS; if (top_bits == 0) { flint_mpn_copyi(y, x + xn - top_limbs, top_limbs); /* inexact */ return 1; } else { /* can be inexact */ cy = mpn_rshift(y, x + xn - top_limbs - 1, top_limbs + 1, FLINT_BITS - top_bits); return (cy != 0) || (top_limbs + 1 != xn); } } }
void nmod_mat_init_set(nmod_mat_t mat, const nmod_mat_t src) { slong rows = src->r; slong cols = src->c; if ((rows) && (cols)) { slong i; mat->entries = flint_malloc(rows * cols * sizeof(mp_limb_t)); mat->rows = flint_malloc(rows * sizeof(mp_limb_t *)); for (i = 0; i < rows; i++) { mat->rows[i] = mat->entries + i * cols; flint_mpn_copyi(mat->rows[i], src->rows[i], cols); } } else mat->entries = NULL; mat->r = rows; mat->c = cols; mat->mod = src->mod; }
void _nmod_poly_divrem_basecase_1(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR; mp_ptr ptrQ = Q - lenB + 1; mp_ptr R1 = W; flint_mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] == 0) { ptrQ[iR] = WORD(0); } else { ptrQ[iR] = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } } if (lenB > 1) _nmod_vec_reduce(R, R1, lenB - 1, mod); }
int arf_set_round(arf_t y, const arf_t x, slong prec, arf_rnd_t rnd) { if (arf_is_special(x)) { arf_set(y, x); return 0; } else { int inexact; slong fix; mp_size_t xn; mp_srcptr xptr; if (y == x) { mp_ptr xtmp; TMP_INIT; ARF_GET_MPN_READONLY(xptr, xn, x); /* exact */ if (xn * FLINT_BITS <= prec) return 0; if ((xn - 1) * FLINT_BITS < prec) { /* exact */ if ((xptr[0] << (prec - (xn-1) * FLINT_BITS)) == 0) return 0; } /* inexact */ TMP_START; xtmp = TMP_ALLOC(xn * sizeof(mp_limb_t)); flint_mpn_copyi(xtmp, xptr, xn); inexact = _arf_set_round_mpn(y, &fix, xtmp, xn, ARF_SGNBIT(x), prec, rnd); _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix); TMP_END; return inexact; } else { ARF_GET_MPN_READONLY(xptr, xn, x); inexact = _arf_set_round_mpn(y, &fix, xptr, xn, ARF_SGNBIT(x), prec, rnd); _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix); return inexact; } } }
/* computes x + y * 2^shift (optionally negated) */ slong _fmpr_add_mpn(fmpr_t z, mp_srcptr xman, mp_size_t xn, int xsign, const fmpz_t xexp, mp_srcptr yman, mp_size_t yn, int ysign, const fmpz_t yexp, slong shift, slong prec, fmpr_rnd_t rnd) { slong tn, zn, alloc, ret, shift_bits, shift_limbs; int negative; mp_limb_t tmp_stack[ADD_STACK_ALLOC]; mp_limb_t cy; mp_ptr tmp, tmp2; shift_limbs = shift / FLINT_BITS; shift_bits = shift % FLINT_BITS; /* x does not overlap with y or the result -- outcome is equivalent to adding/subtracting a small number to/from y and rounding */ if (shift > xn * FLINT_BITS && prec != FMPR_PREC_EXACT && xn * FLINT_BITS + prec - (FLINT_BITS * (yn - 1)) < shift) { zn = (prec + FLINT_BITS - 1) / FLINT_BITS; zn = FLINT_MAX(zn, yn) + 2; shift_limbs = zn - yn; alloc = zn; ADD_TMP_ALLOC flint_mpn_zero(tmp, shift_limbs); flint_mpn_copyi(tmp + shift_limbs, yman, yn); if (xsign == ysign) { tmp[0] = 1; } else { mpn_sub_1(tmp, tmp, zn, 1); while (tmp[zn-1] == 0) zn--; } ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, ysign, prec, rnd); shift -= shift_limbs * FLINT_BITS; fmpz_add_si_inline(fmpr_expref(z), yexp, shift); ADD_TMP_FREE return ret; }
/* requires x != 1 */ static void arf_log_via_mpfr(arf_t z, const arf_t x, slong prec, arf_rnd_t rnd) { mpfr_t xf, zf; mp_ptr zptr, tmp; mp_srcptr xptr; mp_size_t xn, zn, val; TMP_INIT; TMP_START; zn = (prec + FLINT_BITS - 1) / FLINT_BITS; tmp = TMP_ALLOC(zn * sizeof(mp_limb_t)); ARF_GET_MPN_READONLY(xptr, xn, x); xf->_mpfr_d = (mp_ptr) xptr; xf->_mpfr_prec = xn * FLINT_BITS; xf->_mpfr_sign = ARF_SGNBIT(x) ? -1 : 1; xf->_mpfr_exp = ARF_EXP(x); zf->_mpfr_d = tmp; zf->_mpfr_prec = prec; zf->_mpfr_sign = 1; zf->_mpfr_exp = 0; mpfr_set_emin(MPFR_EMIN_MIN); mpfr_set_emax(MPFR_EMAX_MAX); mpfr_log(zf, xf, arf_rnd_to_mpfr(rnd)); val = 0; while (tmp[val] == 0) val++; ARF_GET_MPN_WRITE(zptr, zn - val, z); flint_mpn_copyi(zptr, tmp + val, zn - val); if (zf->_mpfr_sign < 0) ARF_NEG(z); fmpz_set_si(ARF_EXPREF(z), zf->_mpfr_exp); TMP_END; }
void nmod_poly_asin_series(nmod_poly_t g, const nmod_poly_t h, slong n) { mp_ptr h_coeffs; slong h_len = h->length; if (h_len > 0 && h->coeffs[0] != UWORD(0)) { flint_printf("Exception (nmod_poly_asin_series). Constant term != 0.\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); return; } nmod_poly_fit_length(g, n); if (h_len < n) { h_coeffs = _nmod_vec_init(n); flint_mpn_copyi(h_coeffs, h->coeffs, h_len); flint_mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; _nmod_poly_asin_series(g->coeffs, h_coeffs, n, h->mod); if (h_len < n) _nmod_vec_clear(h_coeffs); g->length = n; _nmod_poly_normalise(g); }
void _nmod_poly_divrem_divconquer_recursive(mp_ptr Q, mp_ptr BQ, mp_ptr W, mp_ptr V, mp_srcptr A, mp_srcptr B, slong lenB, nmod_t mod) { if (lenB <= NMOD_DIVREM_DIVCONQUER_CUTOFF) { mp_ptr t = V; mp_ptr w = t + 2*lenB - 1; flint_mpn_copyi(t + lenB - 1, A + lenB - 1, lenB); flint_mpn_zero(t, lenB - 1); _nmod_poly_divrem_basecase(Q, BQ, w, t, 2 * lenB - 1, B, lenB, mod); /* BQ = A - R */ _nmod_vec_neg(BQ, BQ, lenB - 1, mod); } else { const slong n2 = lenB / 2; const slong n1 = lenB - n2; mp_ptr W1 = W; mp_ptr W2 = W + n2; mp_srcptr p1 = A + 2 * n2; mp_srcptr p2; mp_srcptr d1 = B + n2; mp_srcptr d2 = B; mp_srcptr d3 = B + n1; mp_srcptr d4 = B; mp_ptr q1 = Q + n2; mp_ptr q2 = Q; mp_ptr dq1 = BQ + n2; mp_ptr d1q1 = BQ + n2 - (n1 - 1); mp_ptr d2q1, d3q2, d4q2, t; /* Set q1 to p1 div d1, a 2 n1 - 1 by n1 division so q1 ends up being of length n1; low(d1q1) = d1 q1 is of length n1 - 1 */ _nmod_poly_divrem_divconquer_recursive(q1, d1q1, W1, V, p1, d1, n1, mod); /* Compute bottom n1 + n2 - 1 coeffs of d2q1 = d2 q1 */ d2q1 = W1; _nmod_poly_mullow(d2q1, q1, n1, d2, n2, n1 + n2 - 1, mod); /* Compute dq1 = d1 q1 x^n2 + d2 q1, of length n1 + n2 - 1 Split it into a segment of length n1 - 1 at dq1 and a piece of length n2 at BQ. */ flint_mpn_copyi(dq1, d2q1, n1 - 1); if (n2 > n1 - 1) BQ[0] = d2q1[n1 - 1]; _nmod_vec_add(d1q1, d1q1, d2q1 + n2, n1 - 1, mod); /* Compute t = A/x^n2 - dq1, which has length 2 n1 + n2 - 1, but we are not interested in the top n1 coeffs as they will be zero, so this has effective length n1 + n2 - 1 For the following division, we want to set {p2, 2 n2 - 1} to the top 2 n2 - 1 coeffs of this Since the bottom n2 - 1 coeffs of p2 are irrelevant for the division, we in fact set {t, n2} to the relevant coeffs */ t = W1; _nmod_vec_sub(t, A + n2 + (n1 - 1), BQ, n2, mod); p2 = t - (n2 - 1); /* Compute q2 = t div d3, a 2 n2 - 1 by n2 division, so q2 will have length n2; let low(d3q2) = d3 q2, of length n2 - 1 */ d3q2 = BQ; _nmod_poly_divrem_divconquer_recursive(q2, d3q2, W2, V, p2, d3, n2, mod); /* Compute d4q2 = d4 q2, of length n1 + n2 - 1 */ d4q2 = W1; _nmod_poly_mullow(d4q2, d4, n1, q2, n2, n1 + n2 - 1, mod); /* Compute dq2 = d3q2 x^n1 + d4q2, of length n1 + n2 - 1 */ _nmod_vec_add(BQ + n1, BQ + n1, d3q2, n2 - 1, mod); flint_mpn_copyi(BQ, d4q2, n2); _nmod_vec_add(BQ + n2, BQ + n2, d4q2 + n2, n1 - 1, mod); /* Note Q = q1 x^n2 + q2, and BQ = dq1 x^n2 + dq2 */ } }
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly, int alternating) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; int cosorsin; TMP_INIT; TMP_START; if (2 * N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_sin_cos_taylor_rs: N too large!\n"); abort(); } if (N <= 1) { if (N == 0) { flint_mpn_zero(ysin, xn); if (!sinonly) flint_mpn_zero(ycos, xn); error[0] = 0; } else if (N == 1) { flint_mpn_copyi(ysin, x, xn); if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES); error[0] = 1; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | <temp> | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) mpn_sqr(XPOW_WRITE(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } for (cosorsin = sinonly; cosorsin < 2; cosorsin++) { flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[2 * k + cosorsin]; new_denom = factorial_tab_denom[2 * k + cosorsin]; old_denom = factorial_tab_denom[2 * k + cosorsin + 2]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { if (alternating && (k % 2 == 0)) s[xn] += old_denom; mpn_divrem_1(s, 0, s, xn + 1, old_denom); if (alternating && (k % 2 == 0)) s[xn] -= 1; } if (power == 0) { /* add c * x^0 -- only top limb is affected */ if (alternating & k) s[xn] -= c; else s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { if (alternating & k) s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c); else s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ if (cosorsin == 0) { mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]); /* perturb down to a number < 1 if necessary. note that this does not invalidate the error bound: 1 - ulp is either 1 ulp too small or must be closer to the exact value */ if (t[xn] == 0) flint_mpn_copyi(ycos, t, xn); else flint_mpn_store(ycos, xn, LIMB_ONES); } else { mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]); mpn_mul(t, s, xn + 1, x, xn); flint_mpn_copyi(ysin, t + xn, xn); } } /* error bound (ulp) */ error[0] = 2; } TMP_END; }
int arf_root(arf_ptr z, arf_srcptr x, ulong k, slong prec, arf_rnd_t rnd) { mp_size_t xn, zn, val; mp_srcptr xptr; mp_ptr tmp, zptr; mpfr_t xf, zf; fmpz_t q, r; int inexact; if (k == 0) { arf_nan(z); return 0; } if (k == 1) return arf_set_round(z, x, prec, rnd); if (k == 2) return arf_sqrt(z, x, prec, rnd); if (arf_is_special(x)) { if (arf_is_neg_inf(x)) arf_nan(z); else arf_set(z, x); return 0; } if (ARF_SGNBIT(x)) { arf_nan(z); return 0; } fmpz_init(q); fmpz_init(r); /* x = m * 2^e where e = qk + r */ /* x^(1/k) = (m * 2^(qk+r))^(1/k) */ /* x^(1/k) = (m * 2^r)^(1/k) * 2^q */ fmpz_set_ui(r, k); fmpz_fdiv_qr(q, r, ARF_EXPREF(x), r); ARF_GET_MPN_READONLY(xptr, xn, x); zn = (prec + FLINT_BITS - 1) / FLINT_BITS; zf->_mpfr_d = tmp = flint_malloc(zn * sizeof(mp_limb_t)); zf->_mpfr_prec = prec; zf->_mpfr_sign = 1; zf->_mpfr_exp = 0; xf->_mpfr_d = (mp_ptr) xptr; xf->_mpfr_prec = xn * FLINT_BITS; xf->_mpfr_sign = 1; xf->_mpfr_exp = fmpz_get_ui(r); inexact = mpfr_root(zf, xf, k, arf_rnd_to_mpfr(rnd)); inexact = (inexact != 0); val = 0; while (tmp[val] == 0) val++; ARF_GET_MPN_WRITE(zptr, zn - val, z); flint_mpn_copyi(zptr, tmp + val, zn - val); fmpz_add_si(ARF_EXPREF(z), q, zf->_mpfr_exp); flint_free(tmp); fmpz_clear(q); fmpz_clear(r); return inexact; }
void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; TMP_INIT; TMP_START; if (N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_exp_taylor_rs: N too large!\n"); abort(); } if (N <= 3) { if (N <= 1) { flint_mpn_zero(y, xn); y[xn] = N; error[0] = 0; } else if (N == 2) { flint_mpn_copyi(y, x, xn); y[xn] = 1; error[0] = 0; } else { /* 1 + x + x^2 / 2 */ t = TMP_ALLOC_LIMBS(2 * xn); mpn_sqr(t, x, xn); mpn_rshift(t + xn, t + xn, xn, 1); y[xn] = mpn_add_n(y, x, t + xn, xn) + 1; error[0] = 2; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ /* TODO: drop evenness assumption since we don't have sign issues here? */ /* TODO: then just need to fix power construction below... */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | <temp> | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) flint_mpn_copyi(XPOW_READ(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[k]; new_denom = factorial_tab_denom[k]; old_denom = factorial_tab_denom[k+1]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { mpn_divrem_1(s, 0, s, xn + 1, old_denom); } if (power == 0) { /* add c * x^0 -- only top limb is affected */ s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ mpn_divrem_1(y, 0, s, xn + 1, factorial_tab_denom[0]); /* error bound (ulp) */ error[0] = 2; } TMP_END; }
void _arb_atan_taylor_naive(mp_ptr y, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int alternating) { ulong k; mp_ptr s, t, x1, x2, u; mp_size_t nn = xn + 1; if (N == 0) { flint_mpn_zero(y, xn); error[0] = 0; return; } if (N == 1) { flint_mpn_copyi(y, x, xn); error[0] = 0; } s = flint_malloc(sizeof(mp_limb_t) * nn); t = flint_malloc(sizeof(mp_limb_t) * nn); u = flint_malloc(sizeof(mp_limb_t) * 2 * nn); x1 = flint_malloc(sizeof(mp_limb_t) * nn); x2 = flint_malloc(sizeof(mp_limb_t) * nn); flint_mpn_zero(s, nn); flint_mpn_zero(t, nn); flint_mpn_zero(u, 2 * nn); flint_mpn_zero(x1, nn); flint_mpn_zero(x2, nn); /* x1 = x */ flint_mpn_copyi(x1 + 1, x, xn); /* x2 = x * x */ mpn_mul_n(u, x1, x1, nn); flint_mpn_copyi(x2, u + nn, nn); /* s = t = x */ flint_mpn_copyi(s, x1, nn); flint_mpn_copyi(t, x1, nn); for (k = 1; k < N; k++) { /* t = t * x2 */ mpn_mul_n(u, t, x2, nn); flint_mpn_copyi(t, u + nn, nn); /* u = t / (2k+1) */ mpn_divrem_1(u, 0, t, nn, 2 * k + 1); if (alternating & k) mpn_sub_n(s, s, u, nn); else mpn_add_n(s, s, u, nn); } flint_mpn_copyi(y, s + 1, xn); error[0] = 2; flint_free(s); flint_free(t); flint_free(u); flint_free(x1); flint_free(x2); }
int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error, const arf_t x, mp_size_t wn) { mp_srcptr xp; mp_size_t xn; int negative; long exp; ARF_GET_MPN_READONLY(xp, xn, x); exp = ARF_EXP(x); negative = ARF_SGNBIT(x); if (exp <= -1) { /* todo: just zero top */ flint_mpn_zero(w, wn); *error = _arf_get_integer_mpn(w, xp, xn, exp + wn * FLINT_BITS); if (!negative) { fmpz_zero(q); } else { if (wn > ARB_LOG_TAB2_LIMBS) return 0; mpn_sub_n(w, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - wn, w, wn); *error += 1; /* log(2) has 1 ulp error */ fmpz_set_si(q, -1); } return 1; /* success */ } else { mp_ptr qp, rp, np; mp_srcptr dp; mp_size_t qn, rn, nn, dn, tn, alloc; TMP_INIT; tn = ((exp + 2) + FLINT_BITS - 1) / FLINT_BITS; dn = wn + tn; /* denominator */ nn = wn + 2 * tn; /* numerator */ qn = nn - dn + 1; /* quotient */ rn = dn; /* remainder */ if (dn > ARB_LOG_TAB2_LIMBS) return 0; TMP_START; alloc = qn + rn + nn; qp = TMP_ALLOC_LIMBS(alloc); rp = qp + qn; np = rp + rn; dp = arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - dn; /* todo: prove that zeroing is unnecessary */ flint_mpn_zero(np, nn); _arf_get_integer_mpn(np, xp, xn, exp + dn * FLINT_BITS); mpn_tdiv_qr(qp, rp, 0, np, nn, dp, dn); if (!negative) { flint_mpn_copyi(w, rp + tn, wn); *error = 2; } else { if (mpn_add_1(qp, qp, qn, 1)) { /* I believe this cannot happen (should prove it) */ printf("mod log(2): unexpected carry\n"); abort(); } mpn_sub_n(w, dp + tn, rp + tn, wn); *error = 3; } /* read the exponent */ while (qn > 1 && qp[qn-1] == 0) qn--; if (qn == 1) { if (!negative) fmpz_set_ui(q, qp[0]); else fmpz_neg_ui(q, qp[0]); } else { fmpz_set_mpn_large(q, qp, qn, negative); } TMP_END; return 1; } }
void arb_log_arf(arb_t z, const arf_t x, slong prec) { if (arf_is_special(x)) { if (arf_is_pos_inf(x)) arb_pos_inf(z); else arb_indeterminate(z); } else if (ARF_SGNBIT(x)) { arb_indeterminate(z); } else if (ARF_IS_POW2(x)) { if (fmpz_is_one(ARF_EXPREF(x))) { arb_zero(z); } else { fmpz_t exp; fmpz_init(exp); _fmpz_add_fast(exp, ARF_EXPREF(x), -1); arb_const_log2(z, prec + 2); arb_mul_fmpz(z, z, exp, prec); fmpz_clear(exp); } } else if (COEFF_IS_MPZ(*ARF_EXPREF(x))) { arb_log_arf_huge(z, x, prec); } else { slong exp, wp, wn, N, r, closeness_to_one; mp_srcptr xp; mp_size_t xn, tn; mp_ptr tmp, w, t, u; mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy; int negative, inexact, used_taylor_series; TMP_INIT; exp = ARF_EXP(x); negative = 0; ARF_GET_MPN_READONLY(xp, xn, x); /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */ closeness_to_one = 0; if (exp == 0) { slong i; closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]); if (closeness_to_one == FLINT_BITS) { for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i])); } } else if (exp == 1) { closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP)); if (closeness_to_one == FLINT_BITS) { slong i; for (i = xn - 2; xp[i] == 0; i--) closeness_to_one += FLINT_BITS; closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i])); } closeness_to_one--; } /* if |t-1| <= 0.5 */ /* |log(1+t) - t| <= t^2 */ /* |log(1+t) - (t-t^2/2)| <= t^3 */ if (closeness_to_one > prec + 1) { inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); return; } else if (2 * closeness_to_one > prec + 1) { arf_t t, u; arf_init(t); arf_init(u); arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN); arf_mul_2exp_si(u, u, -1); inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); arf_clear(t); arf_clear(u); return; } /* Absolute working precision (NOT rounded to a limb multiple) */ wp = prec + closeness_to_one + 5; /* Too high precision to use table */ if (wp > ARB_LOG_TAB2_PREC) { arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND); arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec); return; } /* Working precision in limbs */ wn = (wp + FLINT_BITS - 1) / FLINT_BITS; TMP_START; tmp = TMP_ALLOC_LIMBS(4 * wn + 3); w = tmp; /* requires wn+1 limbs */ t = w + wn + 1; /* requires wn+1 limbs */ u = t + wn + 1; /* requires 2wn+1 limbs */ /* read x-1 */ if (xn <= wn) { flint_mpn_zero(w, wn - xn); mpn_lshift(w + wn - xn, xp, xn, 1); error = 0; } else { mpn_lshift(w, xp + xn - wn, wn, 1); error = 1; } /* First table-based argument reduction */ if (wp <= ARB_LOG_TAB1_PREC) q1bits = ARB_LOG_TAB11_BITS; else q1bits = ARB_LOG_TAB21_BITS; p1 = w[wn-1] >> (FLINT_BITS - q1bits); /* Special case: covers logarithms of small integers */ if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits)))) { p2 = 0; flint_mpn_zero(t, wn); used_taylor_series = 0; N = r = 0; /* silence compiler warning */ } else { /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */ w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1; mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits)); error += 1; /* Second table-based argument reduction (fused with log->atanh conversion) */ if (wp <= ARB_LOG_TAB1_PREC) q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS; else q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS; p2 = w[wn-1] >> (FLINT_BITS - q2bits); u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits); flint_mpn_zero(u, wn); flint_mpn_copyi(t, u + wn, wn + 1); t[wn] += p2 + (UWORD(1) << (q2bits + 1)); u[2 * wn] -= p2; mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1); /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */ error += 3; /* |w| <= 2^-r */ r = _arb_mpn_leading_zeros(w, wn); /* N >= (wp-r)/(2r) */ N = (wp - r + (2*r-1)) / (2*r); N = FLINT_MAX(N, 0); /* Evaluate Taylor series */ _arb_atan_taylor_rs(t, &error2, w, wn, N, 0); /* Multiply by 2 */ mpn_lshift(t, t, wn, 1); /* Taylor series evaluation error (multiply by 2) */ error += error2 * 2; used_taylor_series = 1; } /* Size of output number */ tn = wn; /* First table lookup */ if (p1 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* Second table lookup */ if (p2 != 0) { if (wp <= ARB_LOG_TAB1_PREC) mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn); else mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn); error++; } /* add exp * log(2) */ exp--; if (exp > 0) { cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp); t[tn] = cy; tn += (cy != 0); error += exp; } else if (exp < 0) { t[tn] = 0; u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp); if (mpn_cmp(t, u, tn + 1) >= 0) { mpn_sub_n(t, t, u, tn + 1); } else { mpn_sub_n(t, u, t, tn + 1); negative = 1; } error += (-exp); tn += (t[tn] != 0); } /* The accumulated arithmetic error */ mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS); /* Truncation error from the Taylor series */ if (used_taylor_series) mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1); /* Set the midpoint */ inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); TMP_END; } }