static VALUE r_mpfr_get_z_2exp(VALUE self) { VALUE ptr_return; MPFR *ptr_self; MP_INT *ptr_mpz; long int exp; r_mpfr_get_struct(ptr_self, self); mpz_make_struct_init(ptr_return, ptr_mpz); exp = mpfr_get_z_2exp(ptr_mpz, ptr_self); return rb_ary_new3(2, ptr_return, INT2NUM(exp)); }
int mpfr_get_z (mpz_ptr z, mpfr_srcptr f, mpfr_rnd_t rnd) { int inex; mpfr_t r; mpfr_exp_t exp; MPFR_SAVE_EXPO_DECL (expo); if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (f))) { if (MPFR_UNLIKELY (MPFR_NOTZERO (f))) MPFR_SET_ERANGEFLAG (); mpz_set_ui (z, 0); /* The ternary value is 0 even for infinity. Giving the rounding direction in this case would not make much sense anyway, and the direction would not necessarily match rnd. */ return 0; } MPFR_SAVE_EXPO_MARK (expo); exp = MPFR_GET_EXP (f); /* if exp <= 0, then |f|<1, thus |o(f)|<=1 */ MPFR_ASSERTN (exp < 0 || exp <= MPFR_PREC_MAX); mpfr_init2 (r, (exp < (mpfr_exp_t) MPFR_PREC_MIN ? MPFR_PREC_MIN : (mpfr_prec_t) exp)); inex = mpfr_rint (r, f, rnd); MPFR_ASSERTN (inex != 1 && inex != -1); /* integral part of f is representable in r */ MPFR_ASSERTN (MPFR_IS_FP (r)); /* The flags from mpfr_rint are the wanted ones. In particular, it sets the inexact flag when necessary. */ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); exp = mpfr_get_z_2exp (z, r); if (exp >= 0) mpz_mul_2exp (z, z, exp); else mpz_fdiv_q_2exp (z, z, -exp); mpfr_clear (r); MPFR_SAVE_EXPO_FREE (expo); return inex; }
static void special (void) { int inex; mpfr_t x; mpz_t z; int i; mpfr_exp_t e; mpfr_init2 (x, 2); mpz_init (z); for (i = -1; i <= 1; i++) { if (i != 0) mpfr_set_nan (x); else mpfr_set_inf (x, i); mpfr_clear_flags (); inex = mpfr_get_z (z, x, MPFR_RNDN); if (!mpfr_erangeflag_p () || inex != 0 || mpz_cmp_ui (z, 0) != 0) { printf ("special() failed on mpfr_get_z for i = %d\n", i); exit (1); } mpfr_clear_flags (); e = mpfr_get_z_2exp (z, x); if (!mpfr_erangeflag_p () || e != __gmpfr_emin || mpz_cmp_ui (z, 0) != 0) { printf ("special() failed on mpfr_get_z_2exp for i = %d\n", i); exit (1); } } mpfr_clear (x); mpz_clear (z); }
int mpfr_cbrt (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) { mpz_t m; mpfr_exp_t e, r, sh; mpfr_prec_t n, size_m, tmp; int inexact, negative; MPFR_SAVE_EXPO_DECL (expo); MPFR_LOG_FUNC ( ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode), ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y, inexact)); /* special values */ if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) { if (MPFR_IS_NAN (x)) { MPFR_SET_NAN (y); MPFR_RET_NAN; } else if (MPFR_IS_INF (x)) { MPFR_SET_INF (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); } /* case 0: cbrt(+/- 0) = +/- 0 */ else /* x is necessarily 0 */ { MPFR_ASSERTD (MPFR_IS_ZERO (x)); MPFR_SET_ZERO (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); } } /* General case */ MPFR_SAVE_EXPO_MARK (expo); mpz_init (m); e = mpfr_get_z_2exp (m, x); /* x = m * 2^e */ if ((negative = MPFR_IS_NEG(x))) mpz_neg (m, m); r = e % 3; if (r < 0) r += 3; /* x = (m*2^r) * 2^(e-r) = (m*2^r) * 2^(3*q) */ MPFR_MPZ_SIZEINBASE2 (size_m, m); n = MPFR_PREC (y) + (rnd_mode == MPFR_RNDN); /* we want 3*n-2 <= size_m + 3*sh + r <= 3*n i.e. 3*sh + size_m + r <= 3*n */ sh = (3 * (mpfr_exp_t) n - (mpfr_exp_t) size_m - r) / 3; sh = 3 * sh + r; if (sh >= 0) { mpz_mul_2exp (m, m, sh); e = e - sh; } else if (r > 0) { mpz_mul_2exp (m, m, r); e = e - r; } /* invariant: x = m*2^e, with e divisible by 3 */ /* we reuse the variable m to store the cube root, since it is not needed any more: we just need to know if the root is exact */ inexact = mpz_root (m, m, 3) == 0; MPFR_MPZ_SIZEINBASE2 (tmp, m); sh = tmp - n; if (sh > 0) /* we have to flush to 0 the last sh bits from m */ { inexact = inexact || ((mpfr_exp_t) mpz_scan1 (m, 0) < sh); mpz_fdiv_q_2exp (m, m, sh); e += 3 * sh; } if (inexact) { if (negative) rnd_mode = MPFR_INVERT_RND (rnd_mode); if (rnd_mode == MPFR_RNDU || rnd_mode == MPFR_RNDA || (rnd_mode == MPFR_RNDN && mpz_tstbit (m, 0))) inexact = 1, mpz_add_ui (m, m, 1); else inexact = -1; } /* either inexact is not zero, and the conversion is exact, i.e. inexact is not changed; or inexact=0, and inexact is set only when rnd_mode=MPFR_RNDN and bit (n+1) from m is 1 */ inexact += mpfr_set_z (y, m, MPFR_RNDN); MPFR_SET_EXP (y, MPFR_GET_EXP (y) + e / 3); if (negative) { MPFR_CHANGE_SIGN (y); inexact = -inexact; } mpz_clear (m); MPFR_SAVE_EXPO_FREE (expo); return mpfr_check_range (y, inexact, rnd_mode); }
void _arith_cos_minpoly(fmpz * coeffs, slong d, ulong n) { slong i, j; fmpz * alpha; fmpz_t half; mpfr_t t, u; mp_bitcnt_t prec; slong exp; if (n <= MAX_32BIT) { for (i = 0; i <= d; i++) fmpz_set_si(coeffs + i, lookup_table[n - 1][i]); return; } /* Direct formula for odd primes > 3 */ if (n_is_prime(n)) { slong s = (n - 1) / 2; switch (s % 4) { case 0: fmpz_set_si(coeffs, WORD(1)); fmpz_set_si(coeffs + 1, -s); break; case 1: fmpz_set_si(coeffs, WORD(1)); fmpz_set_si(coeffs + 1, s + 1); break; case 2: fmpz_set_si(coeffs, WORD(-1)); fmpz_set_si(coeffs + 1, s); break; case 3: fmpz_set_si(coeffs, WORD(-1)); fmpz_set_si(coeffs + 1, -s - 1); break; } for (i = 2; i <= s; i++) { slong b = (s - i) % 2; fmpz_mul2_uiui(coeffs + i, coeffs + i - 2, s+i-b, s+2-b-i); fmpz_divexact2_uiui(coeffs + i, coeffs + i, i, i-1); fmpz_neg(coeffs + i, coeffs + i); } return; } prec = magnitude_bound(d) + 5 + FLINT_BIT_COUNT(d); alpha = _fmpz_vec_init(d); fmpz_init(half); mpfr_init2(t, prec); mpfr_init2(u, prec); fmpz_one(half); fmpz_mul_2exp(half, half, prec - 1); mpfr_const_pi(t, prec); mpfr_div_ui(t, t, n, MPFR_RNDN); for (i = j = 0; j < d; i++) { if (n_gcd(n, i) == 1) { mpfr_mul_ui(u, t, 2 * i, MPFR_RNDN); mpfr_cos(u, u, MPFR_RNDN); mpfr_neg(u, u, MPFR_RNDN); exp = mpfr_get_z_2exp(_fmpz_promote(alpha + j), u); _fmpz_demote_val(alpha + j); fmpz_mul_or_div_2exp(alpha + j, alpha + j, exp + prec); j++; } } balanced_product(coeffs, alpha, d, prec); /* Scale and round */ for (i = 0; i < d + 1; i++) { slong r = d; if ((n & (n - 1)) == 0) r--; fmpz_mul_2exp(coeffs + i, coeffs + i, r); fmpz_add(coeffs + i, coeffs + i, half); fmpz_fdiv_q_2exp(coeffs + i, coeffs + i, prec); } fmpz_clear(half); mpfr_clear(t); mpfr_clear(u); _fmpz_vec_clear(alpha, d); }
/* return non zero iff x^y is exact. Assumes x and y are ordinary numbers, y is not an integer, x is not a power of 2 and x is positive If x^y is exact, it computes it and sets *inexact. */ static int mpfr_pow_is_exact (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd_mode, int *inexact) { mpz_t a, c; mpfr_exp_t d, b; unsigned long i; int res; MPFR_ASSERTD (!MPFR_IS_SINGULAR (y)); MPFR_ASSERTD (!MPFR_IS_SINGULAR (x)); MPFR_ASSERTD (!mpfr_integer_p (y)); MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_INT_SIGN (x), MPFR_GET_EXP (x) - 1) != 0); MPFR_ASSERTD (MPFR_IS_POS (x)); if (MPFR_IS_NEG (y)) return 0; /* x is not a power of two => x^-y is not exact */ /* compute d such that y = c*2^d with c odd integer */ mpz_init (c); d = mpfr_get_z_2exp (c, y); i = mpz_scan1 (c, 0); mpz_fdiv_q_2exp (c, c, i); d += i; /* now y=c*2^d with c odd */ /* Since y is not an integer, d is necessarily < 0 */ MPFR_ASSERTD (d < 0); /* Compute a,b such that x=a*2^b */ mpz_init (a); b = mpfr_get_z_2exp (a, x); i = mpz_scan1 (a, 0); mpz_fdiv_q_2exp (a, a, i); b += i; /* now x=a*2^b with a is odd */ for (res = 1 ; d != 0 ; d++) { /* a*2^b is a square iff (i) a is a square when b is even (ii) 2*a is a square when b is odd */ if (b % 2 != 0) { mpz_mul_2exp (a, a, 1); /* 2*a */ b --; } MPFR_ASSERTD ((b % 2) == 0); if (!mpz_perfect_square_p (a)) { res = 0; goto end; } mpz_sqrt (a, a); b = b / 2; } /* Now x = (a'*2^b')^(2^-d) with d < 0 so x^y = ((a'*2^b')^(2^-d))^(c*2^d) = ((a'*2^b')^c with c odd integer */ { mpfr_t tmp; mpfr_prec_t p; MPFR_MPZ_SIZEINBASE2 (p, a); mpfr_init2 (tmp, p); /* prec = 1 should not be possible */ res = mpfr_set_z (tmp, a, MPFR_RNDN); MPFR_ASSERTD (res == 0); res = mpfr_mul_2si (tmp, tmp, b, MPFR_RNDN); MPFR_ASSERTD (res == 0); *inexact = mpfr_pow_z (z, tmp, c, rnd_mode); mpfr_clear (tmp); res = 1; } end: mpz_clear (a); mpz_clear (c); return res; }
/* f <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ... Assumes |r| < 1/2, and f, r have the same precision. Returns e such that the error on f is bounded by 2^e ulps. */ static int mpfr_cos2_aux (mpfr_ptr f, mpfr_srcptr r) { mpz_t x, t, s; mpfr_exp_t ex, l, m; mpfr_prec_t p, q; unsigned long i, maxi, imax; MPFR_ASSERTD(mpfr_get_exp (r) <= -1); /* compute minimal i such that i*(i+1) does not fit in an unsigned long, assuming that there are no padding bits. */ maxi = 1UL << (CHAR_BIT * sizeof(unsigned long) / 2); if (maxi * (maxi / 2) == 0) /* test checked at compile time */ { /* can occur only when there are padding bits. */ /* maxi * (maxi-1) is representable iff maxi * (maxi / 2) != 0 */ do maxi /= 2; while (maxi * (maxi / 2) == 0); } mpz_init (x); mpz_init (s); mpz_init (t); ex = mpfr_get_z_2exp (x, r); /* r = x*2^ex */ /* remove trailing zeroes */ l = mpz_scan1 (x, 0); ex += l; mpz_fdiv_q_2exp (x, x, l); /* since |r| < 1, r = x*2^ex, and x is an integer, necessarily ex < 0 */ p = mpfr_get_prec (f); /* same than r */ /* bound for number of iterations */ imax = p / (-mpfr_get_exp (r)); imax += (imax == 0); q = 2 * MPFR_INT_CEIL_LOG2(imax) + 4; /* bound for (3l)^2 */ mpz_set_ui (s, 1); /* initialize sum with 1 */ mpz_mul_2exp (s, s, p + q); /* scale all values by 2^(p+q) */ mpz_set (t, s); /* invariant: t is previous term */ for (i = 1; (m = mpz_sizeinbase (t, 2)) >= q; i += 2) { /* adjust precision of x to that of t */ l = mpz_sizeinbase (x, 2); if (l > m) { l -= m; mpz_fdiv_q_2exp (x, x, l); ex += l; } /* multiply t by r */ mpz_mul (t, t, x); mpz_fdiv_q_2exp (t, t, -ex); /* divide t by i*(i+1) */ if (i < maxi) mpz_fdiv_q_ui (t, t, i * (i + 1)); else { mpz_fdiv_q_ui (t, t, i); mpz_fdiv_q_ui (t, t, i + 1); } /* if m is the (current) number of bits of t, we can consider that all operations on t so far had precision >= m, so we can prove by induction that the relative error on t is of the form (1+u)^(3l)-1, where |u| <= 2^(-m), and l=(i+1)/2 is the # of loops. Since |(1+x^2)^(1/x) - 1| <= 4x/3 for |x| <= 1/2, for |u| <= 1/(3l)^2, the absolute error is bounded by 4/3*(3l)*2^(-m)*t <= 4*l since |t| < 2^m. Therefore the error on s is bounded by 2*l*(l+1). */ /* add or subtract to s */ if (i % 4 == 1) mpz_sub (s, s, t); else mpz_add (s, s, t); } mpfr_set_z (f, s, MPFR_RNDN); mpfr_div_2ui (f, f, p + q, MPFR_RNDN); mpz_clear (x); mpz_clear (s); mpz_clear (t); l = (i - 1) / 2; /* number of iterations */ return 2 * MPFR_INT_CEIL_LOG2 (l + 1) + 1; /* bound is 2l(l+1) */ }
static void special (void) { int inex; mpfr_t x; mpz_t z; int i, fi; int rnd; mpfr_exp_t e; mpfr_flags_t flags[3] = { 0, MPFR_FLAGS_ALL ^ MPFR_FLAGS_ERANGE, MPFR_FLAGS_ALL }, ex_flags, gt_flags; mpfr_init2 (x, 2); mpz_init (z); RND_LOOP (rnd) for (i = -1; i <= 1; i++) for (fi = 0; fi < numberof (flags); fi++) { ex_flags = flags[fi] | MPFR_FLAGS_ERANGE; if (i != 0) mpfr_set_nan (x); else mpfr_set_inf (x, i); __gmpfr_flags = flags[fi]; inex = mpfr_get_z (z, x, (mpfr_rnd_t) rnd); gt_flags = __gmpfr_flags; if (gt_flags != ex_flags || inex != 0 || mpz_cmp_ui (z, 0) != 0) { printf ("special() failed on mpfr_get_z" " for %s, i = %d, fi = %d\n", mpfr_print_rnd_mode ((mpfr_rnd_t) rnd), i, fi); printf ("Expected z = 0, inex = 0,"); flags_out (ex_flags); printf ("Got z = "); mpz_out_str (stdout, 10, z); printf (", inex = %d,", inex); flags_out (gt_flags); exit (1); } __gmpfr_flags = flags[fi]; e = mpfr_get_z_2exp (z, x); gt_flags = __gmpfr_flags; if (gt_flags != ex_flags || e != __gmpfr_emin || mpz_cmp_ui (z, 0) != 0) { printf ("special() failed on mpfr_get_z_2exp" " for %s, i = %d, fi = %d\n", mpfr_print_rnd_mode ((mpfr_rnd_t) rnd), i, fi); printf ("Expected z = 0, e = %" MPFR_EXP_FSPEC "d,", (mpfr_eexp_t) __gmpfr_emin); flags_out (ex_flags); printf ("Got z = "); mpz_out_str (stdout, 10, z); printf (", e = %" MPFR_EXP_FSPEC "d,", (mpfr_eexp_t) e); flags_out (gt_flags); exit (1); } } mpfr_clear (x); mpz_clear (z); }
static int mpfr_rem1 (mpfr_ptr rem, long *quo, mpfr_rnd_t rnd_q, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd) { mpfr_exp_t ex, ey; int compare, inex, q_is_odd, sign, signx = MPFR_SIGN (x); mpz_t mx, my, r; int tiny = 0; MPFR_ASSERTD (rnd_q == MPFR_RNDN || rnd_q == MPFR_RNDZ); if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x) || MPFR_IS_SINGULAR (y))) { if (MPFR_IS_NAN (x) || MPFR_IS_NAN (y) || MPFR_IS_INF (x) || MPFR_IS_ZERO (y)) { /* for remquo, quo is undefined */ MPFR_SET_NAN (rem); MPFR_RET_NAN; } else /* either y is Inf and x is 0 or non-special, or x is 0 and y is non-special, in both cases the quotient is zero. */ { if (quo) *quo = 0; return mpfr_set (rem, x, rnd); } } /* now neither x nor y is NaN, Inf or zero */ mpz_init (mx); mpz_init (my); mpz_init (r); ex = mpfr_get_z_2exp (mx, x); /* x = mx*2^ex */ ey = mpfr_get_z_2exp (my, y); /* y = my*2^ey */ /* to get rid of sign problems, we compute it separately: quo(-x,-y) = quo(x,y), rem(-x,-y) = -rem(x,y) quo(-x,y) = -quo(x,y), rem(-x,y) = -rem(x,y) thus quo = sign(x/y)*quo(|x|,|y|), rem = sign(x)*rem(|x|,|y|) */ sign = (signx == MPFR_SIGN (y)) ? 1 : -1; mpz_abs (mx, mx); mpz_abs (my, my); q_is_odd = 0; /* divide my by 2^k if possible to make operations mod my easier */ { unsigned long k = mpz_scan1 (my, 0); ey += k; mpz_fdiv_q_2exp (my, my, k); } if (ex <= ey) { /* q = x/y = mx/(my*2^(ey-ex)) */ /* First detect cases where q=0, to avoid creating a huge number my*2^(ey-ex): if sx = mpz_sizeinbase (mx, 2) and sy = mpz_sizeinbase (my, 2), we have x < 2^(ex + sx) and y >= 2^(ey + sy - 1), thus if ex + sx <= ey + sy - 1 the quotient is 0 */ if (ex + (mpfr_exp_t) mpz_sizeinbase (mx, 2) < ey + (mpfr_exp_t) mpz_sizeinbase (my, 2)) { tiny = 1; mpz_set (r, mx); mpz_set_ui (mx, 0); } else { mpz_mul_2exp (my, my, ey - ex); /* divide mx by my*2^(ey-ex) */ /* since mx > 0 and my > 0, we can use mpz_tdiv_qr in all cases */ mpz_tdiv_qr (mx, r, mx, my); /* 0 <= |r| <= |my|, r has the same sign as mx */ } if (rnd_q == MPFR_RNDN) q_is_odd = mpz_tstbit (mx, 0); if (quo) /* mx is the quotient */ { mpz_tdiv_r_2exp (mx, mx, WANTED_BITS); *quo = mpz_get_si (mx); } } else /* ex > ey */ { if (quo) /* remquo case */ /* for remquo, to get the low WANTED_BITS more bits of the quotient, we first compute R = X mod Y*2^WANTED_BITS, where X and Y are defined below. Then the low WANTED_BITS of the quotient are floor(R/Y). */ mpz_mul_2exp (my, my, WANTED_BITS); /* 2^WANTED_BITS*Y */ else if (rnd_q == MPFR_RNDN) /* remainder case */ /* Let X = mx*2^(ex-ey) and Y = my. Then both X and Y are integers. Assume X = R mod Y, then x = X*2^ey = R*2^ey mod (Y*2^ey=y). To be able to perform the rounding, we need the least significant bit of the quotient, i.e., one more bit in the remainder, which is obtained by dividing by 2Y. */ mpz_mul_2exp (my, my, 1); /* 2Y */ mpz_set_ui (r, 2); mpz_powm_ui (r, r, ex - ey, my); /* 2^(ex-ey) mod my */ mpz_mul (r, r, mx); mpz_mod (r, r, my); if (quo) /* now 0 <= r < 2^WANTED_BITS*Y */ { mpz_fdiv_q_2exp (my, my, WANTED_BITS); /* back to Y */ mpz_tdiv_qr (mx, r, r, my); /* oldr = mx*my + newr */ *quo = mpz_get_si (mx); q_is_odd = *quo & 1; } else if (rnd_q == MPFR_RNDN) /* now 0 <= r < 2Y in the remainder case */ { mpz_fdiv_q_2exp (my, my, 1); /* back to Y */ /* least significant bit of q */ q_is_odd = mpz_cmpabs (r, my) >= 0; if (q_is_odd) mpz_sub (r, r, my); } /* now 0 <= |r| < |my|, and if needed, q_is_odd is the least significant bit of q */ } if (mpz_cmp_ui (r, 0) == 0) { inex = mpfr_set_ui (rem, 0, MPFR_RNDN); /* take into account sign of x */ if (signx < 0) mpfr_neg (rem, rem, MPFR_RNDN); } else { if (rnd_q == MPFR_RNDN) { /* FIXME: the comparison 2*r < my could be done more efficiently at the mpn level */ mpz_mul_2exp (r, r, 1); /* if tiny=1, we should compare r with my*2^(ey-ex) */ if (tiny) { if (ex + (mpfr_exp_t) mpz_sizeinbase (r, 2) < ey + (mpfr_exp_t) mpz_sizeinbase (my, 2)) compare = 0; /* r*2^ex < my*2^ey */ else { mpz_mul_2exp (my, my, ey - ex); compare = mpz_cmpabs (r, my); } } else compare = mpz_cmpabs (r, my); mpz_fdiv_q_2exp (r, r, 1); compare = ((compare > 0) || ((rnd_q == MPFR_RNDN) && (compare == 0) && q_is_odd)); /* if compare != 0, we need to subtract my to r, and add 1 to quo */ if (compare) { mpz_sub (r, r, my); if (quo && (rnd_q == MPFR_RNDN)) *quo += 1; } } /* take into account sign of x */ if (signx < 0) mpz_neg (r, r); inex = mpfr_set_z_2exp (rem, r, ex > ey ? ey : ex, rnd); } if (quo) *quo *= sign; mpz_clear (mx); mpz_clear (my); mpz_clear (r); return inex; }
/* compute in y an approximation of sum(x^k/k/k!, k=1..infinity), and return e such that the absolute error is bound by 2^e ulp(y) */ static mpfr_exp_t mpfr_eint_aux (mpfr_t y, mpfr_srcptr x) { mpfr_t eps; /* dynamic (absolute) error bound on t */ mpfr_t erru, errs; mpz_t m, s, t, u; mpfr_exp_t e, sizeinbase; mpfr_prec_t w = MPFR_PREC(y); unsigned long k; MPFR_GROUP_DECL (group); /* for |x| <= 1, we have S := sum(x^k/k/k!, k=1..infinity) = x + R(x) where |R(x)| <= (x/2)^2/(1-x/2) <= 2*(x/2)^2 thus |R(x)/x| <= |x|/2 thus if |x| <= 2^(-PREC(y)) we have |S - o(x)| <= ulp(y) */ if (MPFR_GET_EXP(x) <= - (mpfr_exp_t) w) { mpfr_set (y, x, MPFR_RNDN); return 0; } mpz_init (s); /* initializes to 0 */ mpz_init (t); mpz_init (u); mpz_init (m); MPFR_GROUP_INIT_3 (group, 31, eps, erru, errs); e = mpfr_get_z_2exp (m, x); /* x = m * 2^e */ MPFR_ASSERTD (mpz_sizeinbase (m, 2) == MPFR_PREC (x)); if (MPFR_PREC (x) > w) { e += MPFR_PREC (x) - w; mpz_tdiv_q_2exp (m, m, MPFR_PREC (x) - w); } /* remove trailing zeroes from m: this will speed up much cases where x is a small integer divided by a power of 2 */ k = mpz_scan1 (m, 0); mpz_tdiv_q_2exp (m, m, k); e += k; /* initialize t to 2^w */ mpz_set_ui (t, 1); mpz_mul_2exp (t, t, w); mpfr_set_ui (eps, 0, MPFR_RNDN); /* eps[0] = 0 */ mpfr_set_ui (errs, 0, MPFR_RNDN); for (k = 1;; k++) { /* let eps[k] be the absolute error on t[k]: since t[k] = trunc(t[k-1]*m*2^e/k), we have eps[k+1] <= 1 + eps[k-1]*m*2^e/k + t[k-1]*m*2^(1-w)*2^e/k = 1 + (eps[k-1] + t[k-1]*2^(1-w))*m*2^e/k = 1 + (eps[k-1]*2^(w-1) + t[k-1])*2^(1-w)*m*2^e/k */ mpfr_mul_2ui (eps, eps, w - 1, MPFR_RNDU); mpfr_add_z (eps, eps, t, MPFR_RNDU); MPFR_MPZ_SIZEINBASE2 (sizeinbase, m); mpfr_mul_2si (eps, eps, sizeinbase - (w - 1) + e, MPFR_RNDU); mpfr_div_ui (eps, eps, k, MPFR_RNDU); mpfr_add_ui (eps, eps, 1, MPFR_RNDU); mpz_mul (t, t, m); if (e < 0) mpz_tdiv_q_2exp (t, t, -e); else mpz_mul_2exp (t, t, e); mpz_tdiv_q_ui (t, t, k); mpz_tdiv_q_ui (u, t, k); mpz_add (s, s, u); /* the absolute error on u is <= 1 + eps[k]/k */ mpfr_div_ui (erru, eps, k, MPFR_RNDU); mpfr_add_ui (erru, erru, 1, MPFR_RNDU); /* and that on s is the sum of all errors on u */ mpfr_add (errs, errs, erru, MPFR_RNDU); /* we are done when t is smaller than errs */ if (mpz_sgn (t) == 0) sizeinbase = 0; else MPFR_MPZ_SIZEINBASE2 (sizeinbase, t); if (sizeinbase < MPFR_GET_EXP (errs)) break; } /* the truncation error is bounded by (|t|+eps)/k*(|x|/k + |x|^2/k^2 + ...) <= (|t|+eps)/k*|x|/(k-|x|) */ mpz_abs (t, t); mpfr_add_z (eps, eps, t, MPFR_RNDU); mpfr_div_ui (eps, eps, k, MPFR_RNDU); mpfr_abs (erru, x, MPFR_RNDU); /* |x| */ mpfr_mul (eps, eps, erru, MPFR_RNDU); mpfr_ui_sub (erru, k, erru, MPFR_RNDD); if (MPFR_IS_NEG (erru)) { /* the truncated series does not converge, return fail */ e = w; } else { mpfr_div (eps, eps, erru, MPFR_RNDU); mpfr_add (errs, errs, eps, MPFR_RNDU); mpfr_set_z (y, s, MPFR_RNDN); mpfr_div_2ui (y, y, w, MPFR_RNDN); /* errs was an absolute error bound on s. We must convert it to an error in terms of ulp(y). Since ulp(y) = 2^(EXP(y)-PREC(y)), we must divide the error by 2^(EXP(y)-PREC(y)), but since we divided also y by 2^w = 2^PREC(y), we must simply divide by 2^EXP(y). */ e = MPFR_GET_EXP (errs) - MPFR_GET_EXP (y); } MPFR_GROUP_CLEAR (group); mpz_clear (s); mpz_clear (t); mpz_clear (u); mpz_clear (m); return e; }