void mag_expinv(mag_t res, const mag_t x) { if (mag_is_zero(x)) { mag_one(res); } else if (mag_is_inf(x)) { mag_zero(res); } else if (fmpz_sgn(MAG_EXPREF(x)) <= 0) { mag_one(res); } else if (fmpz_cmp_ui(MAG_EXPREF(x), 2 * MAG_BITS) > 0) { fmpz_t t; fmpz_init(t); /* If x > 2^60, exp(-x) < 2^(-2^60 / log(2)) */ /* -1/log(2) < -369/256 */ fmpz_set_si(t, -369); fmpz_mul_2exp(t, t, 2 * MAG_BITS - 8); mag_one(res); mag_mul_2exp_fmpz(res, res, t); fmpz_clear(t); } else { fmpz_t t; slong e = MAG_EXP(x); fmpz_init(t); fmpz_set_ui(t, MAG_MAN(x)); if (e >= MAG_BITS) fmpz_mul_2exp(t, t, e - MAG_BITS); else fmpz_tdiv_q_2exp(t, t, MAG_BITS - e); /* upper bound for 1/e */ mag_set_ui_2exp_si(res, 395007543, -30); mag_pow_fmpz(res, res, t); fmpz_clear(t); } }
void mag_rfac_ui(mag_t z, ulong n) { if (n < MAG_FAC_TABLE_NUM) { _fmpz_demote(MAG_EXPREF(z)); MAG_EXP(z) = mag_rfac_tab[n * 2]; MAG_MAN(z) = mag_rfac_tab[n * 2 + 1]; } else { double x = n; x = ceil((((x+0.5)*mag_d_log_lower_bound(x) - x) * 1.4426950408889634074) * -0.9999999); /* x + 1 could round down for huge x, but this doesn't matter as long as the value was perturbed up above */ fmpz_set_d(MAG_EXPREF(z), x + 1); MAG_MAN(z) = MAG_ONE_HALF; } }
void mag_root(mag_t y, const mag_t x, ulong n) { if (n == 0) { mag_inf(y); } else if (n == 1 || mag_is_special(x)) { mag_set(y, x); } else if (n == 2) { mag_sqrt(y, x); } else if (n == 4) { mag_sqrt(y, x); mag_sqrt(y, y); } else { fmpz_t e, f; fmpz_init_set_ui(e, MAG_BITS); fmpz_init(f); /* We evaluate exp(log(1+2^(kn)x)/n) 2^-k where k is chosen so that 2^(kn) x ~= 2^30. TODO: this rewriting is probably unnecessary with the new exp/log functions. */ fmpz_sub(e, e, MAG_EXPREF(x)); fmpz_cdiv_q_ui(e, e, n); fmpz_mul_ui(f, e, n); mag_mul_2exp_fmpz(y, x, f); mag_log1p(y, y); mag_div_ui(y, y, n); mag_exp(y, y); fmpz_neg(e, e); mag_mul_2exp_fmpz(y, y, e); fmpz_clear(e); fmpz_clear(f); } }
void mag_set_d_lower(mag_t z, double c) { if (c < 0.0) c = -c; if (c == 0.0 || (c != c)) { mag_zero(z); } else if (c == D_INF) { mag_inf(z); } else { _fmpz_demote(MAG_EXPREF(z)); MAG_SET_D_2EXP_LOWER(MAG_MAN(z), MAG_EXP(z), c, 0); } }
void arb_const_log2(arb_t res, slong prec) { if (prec < ARB_LOG_TAB2_LIMBS * FLINT_BITS - 16) { slong exp; /* just reading the table is known to give the correct rounding */ _arf_set_round_mpn(arb_midref(res), &exp, arb_log_log2_tab, ARB_LOG_TAB2_LIMBS, 0, prec, ARF_RND_NEAR); _fmpz_set_si_small(ARF_EXPREF(arb_midref(res)), exp); /* 1/2 ulp error */ _fmpz_set_si_small(MAG_EXPREF(arb_radref(res)), exp - prec); MAG_MAN(arb_radref(res)) = MAG_ONE_HALF; } else { arb_const_log2_hypgeom(res, prec); } }
double mag_get_d(const mag_t z) { if (mag_is_zero(z)) { return 0.0; } else if (mag_is_inf(z)) { return D_INF; } else if (MAG_EXP(z) < -1000 || MAG_EXP(z) > 1000) { if (fmpz_sgn(MAG_EXPREF(z)) < 0) return ldexp(1.0, -1000); else return D_INF; } else { return ldexp(MAG_MAN(z), MAG_EXP(z) - MAG_BITS); } }
static __inline__ void _mag_vec_get_fmpz_2exp_blocks(fmpz * coeffs, double * dblcoeffs, fmpz * exps, slong * blocks, const fmpz_t scale, arb_srcptr x, mag_srcptr xm, slong len) { fmpz_t top, bot, t, b, v, block_top, block_bot; slong i, j, s, block, bits, maxheight; int in_zero; mag_srcptr cur; fmpz_init(top); fmpz_init(bot); fmpz_init(t); fmpz_init(b); fmpz_init(v); fmpz_init(block_top); fmpz_init(block_bot); blocks[0] = 0; block = 0; in_zero = 1; maxheight = ALPHA * MAG_BITS + BETA; if (maxheight > DOUBLE_BLOCK_MAX_HEIGHT) abort(); for (i = 0; i < len; i++) { cur = (x == NULL) ? (xm + i) : arb_radref(x + i); /* Skip (must be zero, since we assume there are no Infs/NaNs). */ if (mag_is_special(cur)) continue; /* Bottom and top exponent of current number */ bits = MAG_BITS; fmpz_set(top, MAG_EXPREF(cur)); fmpz_submul_ui(top, scale, i); fmpz_sub_ui(bot, top, bits); /* Extend current block. */ if (in_zero) { fmpz_swap(block_top, top); fmpz_swap(block_bot, bot); } else { fmpz_max(t, top, block_top); fmpz_min(b, bot, block_bot); fmpz_sub(v, t, b); /* extend current block */ if (fmpz_cmp_ui(v, maxheight) < 0) { fmpz_swap(block_top, t); fmpz_swap(block_bot, b); } else /* start new block */ { /* write exponent for previous block */ fmpz_set(exps + block, block_bot); block++; blocks[block] = i; fmpz_swap(block_top, top); fmpz_swap(block_bot, bot); } } in_zero = 0; } /* write exponent for last block */ fmpz_set(exps + block, block_bot); /* end marker */ blocks[block + 1] = len; /* write the block data */ for (i = 0; blocks[i] != len; i++) { for (j = blocks[i]; j < blocks[i + 1]; j++) { cur = (x == NULL) ? (xm + j) : arb_radref(x + j); if (mag_is_special(cur)) { fmpz_zero(coeffs + j); dblcoeffs[j] = 0.0; } else { mp_limb_t man; double c; man = MAG_MAN(cur); /* TODO: only write and use doubles when block is short? */ /* Divide by 2^(scale * j) */ fmpz_mul_ui(t, scale, j); fmpz_sub(t, MAG_EXPREF(cur), t); fmpz_sub_ui(t, t, MAG_BITS); /* bottom exponent */ s = _fmpz_sub_small(t, exps + i); if (s < 0) abort(); /* Bug catcher */ fmpz_set_ui(coeffs + j, man); fmpz_mul_2exp(coeffs + j, coeffs + j, s); c = man; c = ldexp(c, s - DOUBLE_BLOCK_SHIFT); if (c < 1e-150 || c > 1e150) /* Bug catcher */ abort(); dblcoeffs[j] = c; } } } fmpz_clear(top); fmpz_clear(bot); fmpz_clear(t); fmpz_clear(b); fmpz_clear(v); fmpz_clear(block_top); fmpz_clear(block_bot); }
int arb_get_unique_fmpz(fmpz_t z, const arb_t x) { if (!arb_is_finite(x)) { return 0; } else if (arb_is_exact(x)) { /* x = b*2^e, e >= 0 */ if (arf_is_int(arb_midref(x))) { /* arf_get_fmpz aborts on overflow */ arf_get_fmpz(z, arb_midref(x), ARF_RND_DOWN); return 1; } else { return 0; } } /* if the radius is >= 1, there are at least two integers */ else if (mag_cmp_2exp_si(arb_radref(x), 0) >= 0) { return 0; } /* there are 0 or 1 integers if the radius is < 1 */ else { fmpz_t a, b, exp; int res; /* if the midpoint is exactly an integer, it is what we want */ if (arf_is_int(arb_midref(x))) { /* arf_get_fmpz aborts on overflow */ arf_get_fmpz(z, arb_midref(x), ARF_RND_DOWN); return 1; } fmpz_init(a); fmpz_init(b); fmpz_init(exp); /* if the radius is tiny, it can't be an integer */ arf_bot(a, arb_midref(x)); if (fmpz_cmp(a, MAG_EXPREF(arb_radref(x))) > 0) { res = 0; } else { arb_get_interval_fmpz_2exp(a, b, exp, x); if (COEFF_IS_MPZ(*exp)) { flint_printf("arb_get_unique_fmpz: input too large\n"); abort(); } if (*exp >= 0) { res = fmpz_equal(a, b); if (res) { fmpz_mul_2exp(a, a, *exp); fmpz_mul_2exp(b, b, *exp); } } else { fmpz_cdiv_q_2exp(a, a, -(*exp)); fmpz_fdiv_q_2exp(b, b, -(*exp)); res = fmpz_equal(a, b); } if (res) fmpz_set(z, a); } fmpz_clear(a); fmpz_clear(b); fmpz_clear(exp); return res; } }
void _arb_bell_sum_taylor(arb_t res, const fmpz_t n, const fmpz_t a, const fmpz_t b, const fmpz_t mmag, long tol) { fmpz_t m, r, R, tmp; mag_t B, C, D, bound; arb_t t, u; long wp, k, N; if (_fmpz_sub_small(b, a) < 5) { arb_bell_sum_bsplit(res, n, a, b, mmag, tol); return; } fmpz_init(m); fmpz_init(r); fmpz_init(R); fmpz_init(tmp); /* r = max(m - a, b - m) */ /* m = a + (b - a) / 2 */ fmpz_sub(r, b, a); fmpz_cdiv_q_2exp(r, r, 1); fmpz_add(m, a, r); fmpz_mul_2exp(R, r, RADIUS_BITS); mag_init(B); mag_init(C); mag_init(D); mag_init(bound); arb_init(t); arb_init(u); if (fmpz_cmp(R, m) >= 0) { mag_inf(C); mag_inf(D); } else { /* C = exp(R * |F'(m)| + (1/2) R^2 * (n/(m-R)^2 + 1/(m-R))) */ /* C = exp(R * (|F'(m)| + (1/2) R * (n/(m-R) + 1)/(m-R))) */ /* D = (1/2) R * (n/(m-R) + 1)/(m-R) */ fmpz_sub(tmp, m, R); mag_set_fmpz(D, n); mag_div_fmpz(D, D, tmp); mag_one(C); mag_add(D, D, C); mag_div_fmpz(D, D, tmp); mag_mul_fmpz(D, D, R); mag_mul_2exp_si(D, D, -1); /* C = |F'(m)| */ wp = 20 + 1.05 * fmpz_bits(n); arb_set_fmpz(t, n); arb_div_fmpz(t, t, m, wp); fmpz_add_ui(tmp, m, 1); arb_set_fmpz(u, tmp); arb_digamma(u, u, wp); arb_sub(t, t, u, wp); arb_get_mag(C, t); /* C = exp(R * (C + D)) */ mag_add(C, C, D); mag_mul_fmpz(C, C, R); mag_exp(C, C); } if (mag_cmp_2exp_si(C, tol / 4 + 2) > 0) { _arb_bell_sum_taylor(res, n, a, m, mmag, tol); _arb_bell_sum_taylor(t, n, m, b, mmag, tol); arb_add(res, res, t, 2 * tol); } else { arb_ptr mx, ser1, ser2, ser3; /* D = T(m) */ wp = 20 + 1.05 * fmpz_bits(n); arb_set_fmpz(t, m); arb_pow_fmpz(t, t, n, wp); fmpz_add_ui(tmp, m, 1); arb_gamma_fmpz(u, tmp, wp); arb_div(t, t, u, wp); arb_get_mag(D, t); /* error bound: (b-a) * C * D * B^N / (1 - B), B = r/R */ /* ((b-a) * C * D * 2) * 2^(-N*RADIUS_BITS) */ /* ((b-a) * C * D * 2) */ mag_mul(bound, C, D); mag_mul_2exp_si(bound, bound, 1); fmpz_sub(tmp, b, a); mag_mul_fmpz(bound, bound, tmp); /* N = (tol + log2((b-a)*C*D*2) - mmag) / RADIUS_BITS */ if (mmag == NULL) { /* estimate D ~= 2^mmag */ fmpz_add_ui(tmp, MAG_EXPREF(C), tol); fmpz_cdiv_q_ui(tmp, tmp, RADIUS_BITS); } else { fmpz_sub(tmp, MAG_EXPREF(bound), mmag); fmpz_add_ui(tmp, tmp, tol); fmpz_cdiv_q_ui(tmp, tmp, RADIUS_BITS); } if (fmpz_cmp_ui(tmp, 5 * tol / 4) > 0) N = 5 * tol / 4; else if (fmpz_cmp_ui(tmp, 2) < 0) N = 2; else N = fmpz_get_ui(tmp); /* multiply by 2^(-N*RADIUS_BITS) */ mag_mul_2exp_si(bound, bound, -N * RADIUS_BITS); mx = _arb_vec_init(2); ser1 = _arb_vec_init(N); ser2 = _arb_vec_init(N); ser3 = _arb_vec_init(N); /* estimate (this should work for moderate n and tol) */ wp = 1.1 * tol + 1.05 * fmpz_bits(n) + 5; /* increase precision until convergence */ while (1) { /* (m+x)^n / gamma(m+1+x) */ arb_set_fmpz(mx, m); arb_one(mx + 1); _arb_poly_log_series(ser1, mx, 2, N, wp); for (k = 0; k < N; k++) arb_mul_fmpz(ser1 + k, ser1 + k, n, wp); arb_add_ui(mx, mx, 1, wp); _arb_poly_lgamma_series(ser2, mx, 2, N, wp); _arb_vec_sub(ser1, ser1, ser2, N, wp); _arb_poly_exp_series(ser3, ser1, N, N, wp); /* t = a - m, u = b - m */ arb_set_fmpz(t, a); arb_sub_fmpz(t, t, m, wp); arb_set_fmpz(u, b); arb_sub_fmpz(u, u, m, wp); arb_power_sum_vec(ser1, t, u, N, wp); arb_zero(res); for (k = 0; k < N; k++) arb_addmul(res, ser3 + k, ser1 + k, wp); if (mmag != NULL) { if (_fmpz_sub_small(MAG_EXPREF(arb_radref(res)), mmag) <= -tol) break; } else { if (arb_rel_accuracy_bits(res) >= tol) break; } wp = 2 * wp; } /* add the series truncation bound */ arb_add_error_mag(res, bound); _arb_vec_clear(mx, 2); _arb_vec_clear(ser1, N); _arb_vec_clear(ser2, N); _arb_vec_clear(ser3, N); } mag_clear(B); mag_clear(C); mag_clear(D); mag_clear(bound); arb_clear(t); arb_clear(u); fmpz_clear(m); fmpz_clear(r); fmpz_clear(R); fmpz_clear(tmp); }
/* note: z should be exact here */ void acb_lambertw_main(acb_t res, const acb_t z, const acb_t ez1, const fmpz_t k, int flags, slong prec) { acb_t w, t, oldw, ew; mag_t err; slong i, wp, accuracy, ebits, kbits, mbits, wp_initial, extraprec; int have_ew; acb_init(t); acb_init(w); acb_init(oldw); acb_init(ew); mag_init(err); /* We need higher precision for large k, large exponents, or very close to the branch point at -1/e. todo: we should be recomputing ez1 to higher precision when close... */ acb_get_mag(err, z); if (fmpz_is_zero(k) && mag_cmp_2exp_si(err, 0) < 0) ebits = 0; else ebits = fmpz_bits(MAG_EXPREF(err)); if (fmpz_is_zero(k) || (fmpz_is_one(k) && arb_is_negative(acb_imagref(z))) || (fmpz_equal_si(k, -1) && arb_is_nonnegative(acb_imagref(z)))) { acb_get_mag(err, ez1); mbits = -MAG_EXP(err); mbits = FLINT_MAX(mbits, 0); mbits = FLINT_MIN(mbits, prec); } else { mbits = 0; } kbits = fmpz_bits(k); extraprec = FLINT_MAX(ebits, kbits); extraprec = FLINT_MAX(extraprec, mbits); wp = wp_initial = 40 + extraprec; accuracy = acb_lambertw_initial(w, z, ez1, k, wp_initial); mag_zero(arb_radref(acb_realref(w))); mag_zero(arb_radref(acb_imagref(w))); /* We should be able to compute e^w for the final certification during the Halley iteration. */ have_ew = 0; for (i = 0; i < 5 + FLINT_BIT_COUNT(prec + extraprec); i++) { /* todo: should we restart? */ if (!acb_is_finite(w)) break; wp = FLINT_MIN(3 * accuracy, 1.1 * prec + 10); wp = FLINT_MAX(wp, 40); wp += extraprec; acb_set(oldw, w); acb_lambertw_halley_step(t, ew, z, w, wp); /* estimate the error (conservatively) */ acb_sub(w, w, t, wp); acb_get_mag(err, w); acb_set(w, t); acb_add_error_mag(t, err); accuracy = acb_rel_accuracy_bits(t); if (accuracy > 2 * extraprec) accuracy *= 2.9; /* less conservatively */ accuracy = FLINT_MIN(accuracy, wp); accuracy = FLINT_MAX(accuracy, 0); if (accuracy > prec + extraprec) { /* e^w = e^oldw * e^(w-oldw) */ acb_sub(t, w, oldw, wp); acb_exp(t, t, wp); acb_mul(ew, ew, t, wp); have_ew = 1; break; } mag_zero(arb_radref(acb_realref(w))); mag_zero(arb_radref(acb_imagref(w))); } wp = FLINT_MIN(3 * accuracy, 1.1 * prec + 10); wp = FLINT_MAX(wp, 40); wp += extraprec; if (acb_lambertw_check_branch(w, k, wp)) { acb_t u, r, eu1; mag_t err, rad; acb_init(u); acb_init(r); acb_init(eu1); mag_init(err); mag_init(rad); if (have_ew) acb_set(t, ew); else acb_exp(t, w, wp); /* t = w e^w */ acb_mul(t, t, w, wp); acb_sub(r, t, z, wp); /* Bound W' on the straight line path between t and z */ acb_union(u, t, z, wp); arb_const_e(acb_realref(eu1), wp); arb_zero(acb_imagref(eu1)); acb_mul(eu1, eu1, u, wp); acb_add_ui(eu1, eu1, 1, wp); if (acb_lambertw_branch_crossing(u, eu1, k)) { mag_inf(err); } else { acb_lambertw_bound_deriv(err, u, eu1, k); acb_get_mag(rad, r); mag_mul(err, err, rad); } acb_add_error_mag(w, err); acb_set(res, w); acb_clear(u); acb_clear(r); acb_clear(eu1); mag_clear(err); mag_clear(rad); } else { acb_indeterminate(res); } acb_clear(t); acb_clear(w); acb_clear(oldw); acb_clear(ew); mag_clear(err); }
void mag_log1p(mag_t z, const mag_t x) { if (mag_is_special(x)) { if (mag_is_zero(x)) mag_zero(z); else mag_inf(z); } else { fmpz exp = MAG_EXP(x); if (!COEFF_IS_MPZ(exp)) { /* Quick bound by x */ if (exp < -10) { mag_set(z, x); return; } else if (exp < 1000) { double t; t = ldexp(MAG_MAN(x), exp - MAG_BITS); t = (1.0 + t) * (1 + 1e-14); t = mag_d_log_upper_bound(t); mag_set_d(z, t); return; } } else if (fmpz_sgn(MAG_EXPREF(x)) < 0) { /* Quick bound by x */ mag_set(z, x); return; } /* Now we must have x >= 2^1000 */ /* Use log(2^(exp-1) * (2*v)) = exp*log(2) + log(2*v) */ { double t; fmpz_t b; mag_t u; mag_init(u); fmpz_init(b); /* incrementing the mantissa gives an upper bound for x+1 */ t = ldexp(MAG_MAN(x) + 1, 1 - MAG_BITS); t = mag_d_log_upper_bound(t); mag_set_d(u, t); /* log(2) < 744261118/2^30 */ _fmpz_add_fast(b, MAG_EXPREF(x), -1); fmpz_mul_ui(b, b, 744261118); mag_set_fmpz(z, b); _fmpz_add_fast(MAG_EXPREF(z), MAG_EXPREF(z), -30); mag_add(z, z, u); mag_clear(u); fmpz_clear(b); } } }