/* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n where x = n*log(2)+(2^K)*r together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)). */ int mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) { long n; unsigned long K, k, l, err; /* FIXME: Which type ? */ int error_r; mp_exp_t exps; mp_prec_t q, precy; int inexact; mpfr_t r, s, t; mpz_t ss; TMP_DECL(marker); precy = MPFR_PREC(y); MPFR_TRACE ( printf("Py=%d Px=%d", MPFR_PREC(y), MPFR_PREC(x)) ); MPFR_TRACE ( MPFR_DUMP (x) ); n = (long) (mpfr_get_d1 (x) / LOG2); /* error bounds the cancelled bits in x - n*log(2) */ if (MPFR_UNLIKELY(n == 0)) error_r = 0; else count_leading_zeros (error_r, (mp_limb_t) (n < 0) ? -n : n); error_r = BITS_PER_MP_LIMB - error_r + 2; /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of n/K terms costs about n/(2K) multiplications when computed in fixed point */ K = (precy < SWITCH) ? __gmpfr_isqrt ((precy + 1) / 2) : __gmpfr_cuberoot (4*precy); l = (precy - 1) / K + 1; err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18); /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */ q = precy + err + K + 5; /*q = ( (q-1)/BITS_PER_MP_LIMB + 1) * BITS_PER_MP_LIMB; */ mpfr_init2 (r, q + error_r); mpfr_init2 (s, q + error_r); mpfr_init2 (t, q); /* the algorithm consists in computing an upper bound of exp(x) using a precision of q bits, and see if we can round to MPFR_PREC(y) taking into account the maximal error. Otherwise we increase q. */ for (;;) { MPFR_TRACE ( printf("n=%d K=%d l=%d q=%d\n",n,K,l,q) ); /* if n<0, we have to get an upper bound of log(2) in order to get an upper bound of r = x-n*log(2) */ mpfr_const_log2 (s, (n >= 0) ? GMP_RNDZ : GMP_RNDU); /* s is within 1 ulp of log(2) */ mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? GMP_RNDZ : GMP_RNDU); /* r is within 3 ulps of n*log(2) */ if (n < 0) mpfr_neg (r, r, GMP_RNDD); /* exact */ /* r = floor(n*log(2)), within 3 ulps */ MPFR_TRACE ( MPFR_DUMP (x) ); MPFR_TRACE ( MPFR_DUMP (r) ); mpfr_sub (r, x, r, GMP_RNDU); /* possible cancellation here: the error on r is at most 3*2^(EXP(old_r)-EXP(new_r)) */ while (MPFR_IS_NEG (r)) { /* initial approximation n was too large */ n--; mpfr_add (r, r, s, GMP_RNDU); } mpfr_prec_round (r, q, GMP_RNDU); MPFR_TRACE ( MPFR_DUMP (r) ); MPFR_ASSERTD (MPFR_IS_POS (r)); mpfr_div_2ui (r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K, exact */ TMP_MARK(marker); MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB)); exps = mpfr_get_z_exp (ss, s); /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */ l = (precy < SWITCH) ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */ : mpfr_exp2_aux2 (ss, r, q, &exps); /* Brent/Kung method */ MPFR_TRACE(printf("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q)); for (k = 0; k < K; k++) { mpz_mul (ss, ss, ss); exps <<= 1; exps += mpz_normalize (ss, ss, q); } mpfr_set_z (s, ss, GMP_RNDN); MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps); TMP_FREE(marker); /* don't need ss anymore */ if (n>0) mpfr_mul_2ui(s, s, n, GMP_RNDU); else mpfr_div_2ui(s, s, -n, GMP_RNDU); /* error is at most 2^K*(3l*(l+1)) ulp for mpfr_exp2_aux */ l = (precy < SWITCH) ? 3*l*(l+1) : l*(l+4) ; k = MPFR_INT_CEIL_LOG2 (l); /* k = 0; while (l) { k++; l >>= 1; } */ /* now k = ceil(log(error in ulps)/log(2)) */ K += k; MPFR_TRACE ( printf("after mult. by 2^n:\n") ); MPFR_TRACE ( MPFR_DUMP (s) ); MPFR_TRACE ( printf("err=%d bits\n", K) ); if (mpfr_can_round (s, q - K, GMP_RNDN, GMP_RNDZ, precy + (rnd_mode == GMP_RNDN)) ) break; MPFR_TRACE (printf("prec++, use %d\n", q+BITS_PER_MP_LIMB) ); MPFR_TRACE (printf("q=%d q-K=%d precy=%d\n",q,q-K,precy) ); q += BITS_PER_MP_LIMB; mpfr_set_prec (r, q); mpfr_set_prec (s, q); mpfr_set_prec (t, q); } inexact = mpfr_set (y, s, rnd_mode); mpfr_clear (r); mpfr_clear (s); mpfr_clear (t); return inexact; }
/* Input: s - a floating-point number >= 1/2. rnd_mode - a rounding mode. Assumes s is neither NaN nor Infinite. Output: z - Zeta(s) rounded to the precision of z with direction rnd_mode */ static int mpfr_zeta_pos (mpfr_t z, mpfr_srcptr s, mp_rnd_t rnd_mode) { mpfr_t b, c, z_pre, f, s1; double beta, sd, dnep; mpfr_t *tc1; mp_prec_t precz, precs, d, dint; int p, n, l, add; int inex; MPFR_GROUP_DECL (group); MPFR_ZIV_DECL (loop); MPFR_ASSERTD (MPFR_IS_POS (s) && MPFR_GET_EXP (s) >= 0); precz = MPFR_PREC (z); precs = MPFR_PREC (s); /* Zeta(x) = 1+1/2^x+1/3^x+1/4^x+1/5^x+O(1/6^x) so with 2^(EXP(x)-1) <= x < 2^EXP(x) So for x > 2^3, k^x > k^8, so 2/k^x < 2/k^8 Zeta(x) = 1 + 1/2^x*(1+(2/3)^x+(2/4)^x+...) = 1 + 1/2^x*(1+sum((2/k)^x,k=3..infinity)) <= 1 + 1/2^x*(1+sum((2/k)^8,k=3..infinity)) And sum((2/k)^8,k=3..infinity) = -257+128*Pi^8/4725 ~= 0.0438035 So Zeta(x) <= 1 + 1/2^x*2 for x >= 8 The error is < 2^(-x+1) <= 2^(-2^(EXP(x)-1)+1) */ if (MPFR_GET_EXP (s) > 3) { mp_exp_t err; err = MPFR_GET_EXP (s) - 1; if (err > (mp_exp_t) (sizeof (mp_exp_t)*CHAR_BIT-2)) err = MPFR_EMAX_MAX; else err = ((mp_exp_t)1) << err; err = 1 - (-err+1); /* GET_EXP(one) - (-err+1) = err :) */ MPFR_FAST_COMPUTE_IF_SMALL_INPUT (z, __gmpfr_one, err, 0, 1, rnd_mode, {}); } d = precz + MPFR_INT_CEIL_LOG2(precz) + 10; /* we want that s1 = s-1 is exact, i.e. we should have PREC(s1) >= EXP(s) */ dint = (mpfr_uexp_t) MPFR_GET_EXP (s); mpfr_init2 (s1, MAX (precs, dint)); inex = mpfr_sub (s1, s, __gmpfr_one, GMP_RNDN); MPFR_ASSERTD (inex == 0); /* case s=1 */ if (MPFR_IS_ZERO (s1)) { MPFR_SET_INF (z); MPFR_SET_POS (z); MPFR_ASSERTD (inex == 0); goto clear_and_return; } MPFR_GROUP_INIT_4 (group, MPFR_PREC_MIN, b, c, z_pre, f); MPFR_ZIV_INIT (loop, d); for (;;) { /* Principal loop: we compute, in z_pre, an approximation of Zeta(s), that we send to can_round */ if (MPFR_GET_EXP (s1) <= -(mp_exp_t) ((mpfr_prec_t) (d-3)/2)) /* Branch 1: when s-1 is very small, one uses the approximation Zeta(s)=1/(s-1)+gamma, where gamma is Euler's constant */ { dint = MAX (d + 3, precs); MPFR_TRACE (printf ("branch 1\ninternal precision=%d\n", dint)); MPFR_GROUP_REPREC_4 (group, dint, b, c, z_pre, f); mpfr_div (z_pre, __gmpfr_one, s1, GMP_RNDN); mpfr_const_euler (f, GMP_RNDN); mpfr_add (z_pre, z_pre, f, GMP_RNDN); } else /* Branch 2 */ { size_t size; MPFR_TRACE (printf ("branch 2\n")); /* Computation of parameters n, p and working precision */ dnep = (double) d * LOG2; sd = mpfr_get_d (s, GMP_RNDN); /* beta = dnep + 0.61 + sd * log (6.2832 / sd); but a larger value is ok */ #define LOG6dot2832 1.83787940484160805532 beta = dnep + 0.61 + sd * (LOG6dot2832 - LOG2 * __gmpfr_floor_log2 (sd)); if (beta <= 0.0) { p = 0; /* n = 1 + (int) (exp ((dnep - LOG2) / sd)); */ n = 1 + (int) __gmpfr_ceil_exp2 ((d - 1.0) / sd); } else { p = 1 + (int) beta / 2; n = 1 + (int) ((sd + 2.0 * (double) p - 1.0) / 6.2832); } MPFR_TRACE (printf ("\nn=%d\np=%d\n",n,p)); /* add = 4 + floor(1.5 * log(d) / log (2)). We should have add >= 10, which is always fulfilled since d = precz + 11 >= 12, thus ceil(log2(d)) >= 4 */ add = 4 + (3 * MPFR_INT_CEIL_LOG2 (d)) / 2; MPFR_ASSERTD(add >= 10); dint = d + add; if (dint < precs) dint = precs; MPFR_TRACE (printf("internal precision=%d\n",dint)); size = (p + 1) * sizeof(mpfr_t); tc1 = (mpfr_t*) (*__gmp_allocate_func) (size); for (l=1; l<=p; l++) mpfr_init2 (tc1[l], dint); MPFR_GROUP_REPREC_4 (group, dint, b, c, z_pre, f); MPFR_TRACE (printf ("precision of z =%d\n", precz)); /* Computation of the coefficients c_k */ mpfr_zeta_c (p, tc1); /* Computation of the 3 parts of the fonction Zeta. */ mpfr_zeta_part_a (z_pre, s, n); mpfr_zeta_part_b (b, s, n, p, tc1); /* s1 = s-1 is already computed above */ mpfr_div (c, __gmpfr_one, s1, GMP_RNDN); mpfr_ui_pow (f, n, s1, GMP_RNDN); mpfr_div (c, c, f, GMP_RNDN); MPFR_TRACE (MPFR_DUMP (c)); mpfr_add (z_pre, z_pre, c, GMP_RNDN); mpfr_add (z_pre, z_pre, b, GMP_RNDN); for (l=1; l<=p; l++) mpfr_clear (tc1[l]); (*__gmp_free_func) (tc1, size); /* End branch 2 */ } MPFR_TRACE (MPFR_DUMP (z_pre)); if (MPFR_LIKELY (MPFR_CAN_ROUND (z_pre, d-3, precz, rnd_mode))) break; MPFR_ZIV_NEXT (loop, d); } MPFR_ZIV_FREE (loop); inex = mpfr_set (z, z_pre, rnd_mode); MPFR_GROUP_CLEAR (group); clear_and_return: mpfr_clear (s1); return inex; }