Exemplo n.º 1
0
Arquivo: pow.c Projeto: epowers/mpfr
/* The computation of z = pow(x,y) is done by
   z = exp(y * log(x)) = x^y
   For the special cases, see Section F.9.4.4 of the C standard:
     _ pow(±0, y) = ±inf for y an odd integer < 0.
     _ pow(±0, y) = +inf for y < 0 and not an odd integer.
     _ pow(±0, y) = ±0 for y an odd integer > 0.
     _ pow(±0, y) = +0 for y > 0 and not an odd integer.
     _ pow(-1, ±inf) = 1.
     _ pow(+1, y) = 1 for any y, even a NaN.
     _ pow(x, ±0) = 1 for any x, even a NaN.
     _ pow(x, y) = NaN for finite x < 0 and finite non-integer y.
     _ pow(x, -inf) = +inf for |x| < 1.
     _ pow(x, -inf) = +0 for |x| > 1.
     _ pow(x, +inf) = +0 for |x| < 1.
     _ pow(x, +inf) = +inf for |x| > 1.
     _ pow(-inf, y) = -0 for y an odd integer < 0.
     _ pow(-inf, y) = +0 for y < 0 and not an odd integer.
     _ pow(-inf, y) = -inf for y an odd integer > 0.
     _ pow(-inf, y) = +inf for y > 0 and not an odd integer.
     _ pow(+inf, y) = +0 for y < 0.
     _ pow(+inf, y) = +inf for y > 0. */
int
mpfr_pow (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd_mode)
{
  int inexact;
  int cmp_x_1;
  int y_is_integer;
  MPFR_SAVE_EXPO_DECL (expo);

  MPFR_LOG_FUNC
    (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg rnd=%d",
      mpfr_get_prec (x), mpfr_log_prec, x,
      mpfr_get_prec (y), mpfr_log_prec, y, rnd_mode),
     ("z[%Pu]=%.*Rg inexact=%d",
      mpfr_get_prec (z), mpfr_log_prec, z, inexact));

  if (MPFR_ARE_SINGULAR (x, y))
    {
      /* pow(x, 0) returns 1 for any x, even a NaN. */
      if (MPFR_UNLIKELY (MPFR_IS_ZERO (y)))
        return mpfr_set_ui (z, 1, rnd_mode);
      else if (MPFR_IS_NAN (x))
        {
          MPFR_SET_NAN (z);
          MPFR_RET_NAN;
        }
      else if (MPFR_IS_NAN (y))
        {
          /* pow(+1, NaN) returns 1. */
          if (mpfr_cmp_ui (x, 1) == 0)
            return mpfr_set_ui (z, 1, rnd_mode);
          MPFR_SET_NAN (z);
          MPFR_RET_NAN;
        }
      else if (MPFR_IS_INF (y))
        {
          if (MPFR_IS_INF (x))
            {
              if (MPFR_IS_POS (y))
                MPFR_SET_INF (z);
              else
                MPFR_SET_ZERO (z);
              MPFR_SET_POS (z);
              MPFR_RET (0);
            }
          else
            {
              int cmp;
              cmp = mpfr_cmpabs (x, __gmpfr_one) * MPFR_INT_SIGN (y);
              MPFR_SET_POS (z);
              if (cmp > 0)
                {
                  /* Return +inf. */
                  MPFR_SET_INF (z);
                  MPFR_RET (0);
                }
              else if (cmp < 0)
                {
                  /* Return +0. */
                  MPFR_SET_ZERO (z);
                  MPFR_RET (0);
                }
              else
                {
                  /* Return 1. */
                  return mpfr_set_ui (z, 1, rnd_mode);
                }
            }
        }
      else if (MPFR_IS_INF (x))
        {
          int negative;
          /* Determine the sign now, in case y and z are the same object */
          negative = MPFR_IS_NEG (x) && is_odd (y);
          if (MPFR_IS_POS (y))
            MPFR_SET_INF (z);
          else
            MPFR_SET_ZERO (z);
          if (negative)
            MPFR_SET_NEG (z);
          else
            MPFR_SET_POS (z);
          MPFR_RET (0);
        }
      else
        {
          int negative;
          MPFR_ASSERTD (MPFR_IS_ZERO (x));
          /* Determine the sign now, in case y and z are the same object */
          negative = MPFR_IS_NEG(x) && is_odd (y);
          if (MPFR_IS_NEG (y))
            {
              MPFR_ASSERTD (! MPFR_IS_INF (y));
              MPFR_SET_INF (z);
              mpfr_set_divby0 ();
            }
          else
            MPFR_SET_ZERO (z);
          if (negative)
            MPFR_SET_NEG (z);
          else
            MPFR_SET_POS (z);
          MPFR_RET (0);
        }
    }

  /* x^y for x < 0 and y not an integer is not defined */
  y_is_integer = mpfr_integer_p (y);
  if (MPFR_IS_NEG (x) && ! y_is_integer)
    {
      MPFR_SET_NAN (z);
      MPFR_RET_NAN;
    }

  /* now the result cannot be NaN:
     (1) either x > 0
     (2) or x < 0 and y is an integer */

  cmp_x_1 = mpfr_cmpabs (x, __gmpfr_one);
  if (cmp_x_1 == 0)
    return mpfr_set_si (z, MPFR_IS_NEG (x) && is_odd (y) ? -1 : 1, rnd_mode);

  /* now we have:
     (1) either x > 0
     (2) or x < 0 and y is an integer
     and in addition |x| <> 1.
  */

  /* detect overflow: an overflow is possible if
     (a) |x| > 1 and y > 0
     (b) |x| < 1 and y < 0.
     FIXME: this assumes 1 is always representable.

     FIXME2: maybe we can test overflow and underflow simultaneously.
     The idea is the following: first compute an approximation to
     y * log2|x|, using rounding to nearest. If |x| is not too near from 1,
     this approximation should be accurate enough, and in most cases enable
     one to prove that there is no underflow nor overflow.
     Otherwise, it should enable one to check only underflow or overflow,
     instead of both cases as in the present case.
  */
  if (cmp_x_1 * MPFR_SIGN (y) > 0)
    {
      mpfr_t t;
      int negative, overflow;

      MPFR_SAVE_EXPO_MARK (expo);
      mpfr_init2 (t, 53);
      /* we want a lower bound on y*log2|x|:
         (i) if x > 0, it suffices to round log2(x) toward zero, and
             to round y*o(log2(x)) toward zero too;
         (ii) if x < 0, we first compute t = o(-x), with rounding toward 1,
              and then follow as in case (1). */
      if (MPFR_SIGN (x) > 0)
        mpfr_log2 (t, x, MPFR_RNDZ);
      else
        {
          mpfr_neg (t, x, (cmp_x_1 > 0) ? MPFR_RNDZ : MPFR_RNDU);
          mpfr_log2 (t, t, MPFR_RNDZ);
        }
      mpfr_mul (t, t, y, MPFR_RNDZ);
      overflow = mpfr_cmp_si (t, __gmpfr_emax) > 0;
      mpfr_clear (t);
      MPFR_SAVE_EXPO_FREE (expo);
      if (overflow)
        {
          MPFR_LOG_MSG (("early overflow detection\n", 0));
          negative = MPFR_SIGN(x) < 0 && is_odd (y);
          return mpfr_overflow (z, rnd_mode, negative ? -1 : 1);
        }
    }

  /* Basic underflow checking. One has:
   *   - if y > 0, |x^y| < 2^(EXP(x) * y);
   *   - if y < 0, |x^y| <= 2^((EXP(x) - 1) * y);
   * so that one can compute a value ebound such that |x^y| < 2^ebound.
   * If we have ebound <= emin - 2 (emin - 1 in directed rounding modes),
   * then there is an underflow and we can decide the return value.
   */
  if (MPFR_IS_NEG (y) ? (MPFR_GET_EXP (x) > 1) : (MPFR_GET_EXP (x) < 0))
    {
      mpfr_t tmp;
      mpfr_eexp_t ebound;
      int inex2;

      /* We must restore the flags. */
      MPFR_SAVE_EXPO_MARK (expo);
      mpfr_init2 (tmp, sizeof (mpfr_exp_t) * CHAR_BIT);
      inex2 = mpfr_set_exp_t (tmp, MPFR_GET_EXP (x), MPFR_RNDN);
      MPFR_ASSERTN (inex2 == 0);
      if (MPFR_IS_NEG (y))
        {
          inex2 = mpfr_sub_ui (tmp, tmp, 1, MPFR_RNDN);
          MPFR_ASSERTN (inex2 == 0);
        }
      mpfr_mul (tmp, tmp, y, MPFR_RNDU);
      if (MPFR_IS_NEG (y))
        mpfr_nextabove (tmp);
      /* tmp doesn't necessarily fit in ebound, but that doesn't matter
         since we get the minimum value in such a case. */
      ebound = mpfr_get_exp_t (tmp, MPFR_RNDU);
      mpfr_clear (tmp);
      MPFR_SAVE_EXPO_FREE (expo);
      if (MPFR_UNLIKELY (ebound <=
                         __gmpfr_emin - (rnd_mode == MPFR_RNDN ? 2 : 1)))
        {
          /* warning: mpfr_underflow rounds away from 0 for MPFR_RNDN */
          MPFR_LOG_MSG (("early underflow detection\n", 0));
          return mpfr_underflow (z,
                                 rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
                                 MPFR_SIGN (x) < 0 && is_odd (y) ? -1 : 1);
        }
    }

  /* If y is an integer, we can use mpfr_pow_z (based on multiplications),
     but if y is very large (I'm not sure about the best threshold -- VL),
     we shouldn't use it, as it can be very slow and take a lot of memory
     (and even crash or make other programs crash, as several hundred of
     MBs may be necessary). Note that in such a case, either x = +/-2^b
     (this case is handled below) or x^y cannot be represented exactly in
     any precision supported by MPFR (the general case uses this property).
  */
  if (y_is_integer && (MPFR_GET_EXP (y) <= 256))
    {
      mpz_t zi;

      MPFR_LOG_MSG (("special code for y not too large integer\n", 0));
      mpz_init (zi);
      mpfr_get_z (zi, y, MPFR_RNDN);
      inexact = mpfr_pow_z (z, x, zi, rnd_mode);
      mpz_clear (zi);
      return inexact;
    }

  /* Special case (+/-2^b)^Y which could be exact. If x is negative, then
     necessarily y is a large integer. */
  {
    mpfr_exp_t b = MPFR_GET_EXP (x) - 1;

    MPFR_ASSERTN (b >= LONG_MIN && b <= LONG_MAX);  /* FIXME... */
    if (mpfr_cmp_si_2exp (x, MPFR_SIGN(x), b) == 0)
      {
        mpfr_t tmp;
        int sgnx = MPFR_SIGN (x);

        MPFR_LOG_MSG (("special case (+/-2^b)^Y\n", 0));
        /* now x = +/-2^b, so x^y = (+/-1)^y*2^(b*y) is exact whenever b*y is
           an integer */
        MPFR_SAVE_EXPO_MARK (expo);
        mpfr_init2 (tmp, MPFR_PREC (y) + sizeof (long) * CHAR_BIT);
        inexact = mpfr_mul_si (tmp, y, b, MPFR_RNDN); /* exact */
        MPFR_ASSERTN (inexact == 0);
        /* Note: as the exponent range has been extended, an overflow is not
           possible (due to basic overflow and underflow checking above, as
           the result is ~ 2^tmp), and an underflow is not possible either
           because b is an integer (thus either 0 or >= 1). */
        MPFR_CLEAR_FLAGS ();
        inexact = mpfr_exp2 (z, tmp, rnd_mode);
        mpfr_clear (tmp);
        if (sgnx < 0 && is_odd (y))
          {
            mpfr_neg (z, z, rnd_mode);
            inexact = -inexact;
          }
        /* Without the following, the overflows3 test in tpow.c fails. */
        MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
        MPFR_SAVE_EXPO_FREE (expo);
        return mpfr_check_range (z, inexact, rnd_mode);
      }
  }

  MPFR_SAVE_EXPO_MARK (expo);

  /* Case where |y * log(x)| is very small. Warning: x can be negative, in
     that case y is a large integer. */
  {
    mpfr_t t;
    mpfr_exp_t err;

    /* We need an upper bound on the exponent of y * log(x). */
    mpfr_init2 (t, 16);
    if (MPFR_IS_POS(x))
      mpfr_log (t, x, cmp_x_1 < 0 ? MPFR_RNDD : MPFR_RNDU); /* away from 0 */
    else
      {
        /* if x < -1, round to +Inf, else round to zero */
        mpfr_neg (t, x, (mpfr_cmp_si (x, -1) < 0) ? MPFR_RNDU : MPFR_RNDD);
        mpfr_log (t, t, (mpfr_cmp_ui (t, 1) < 0) ? MPFR_RNDD : MPFR_RNDU);
      }
    MPFR_ASSERTN (MPFR_IS_PURE_FP (t));
    err = MPFR_GET_EXP (y) + MPFR_GET_EXP (t);
    mpfr_clear (t);
    MPFR_CLEAR_FLAGS ();
    MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (z, __gmpfr_one, - err, 0,
                                      (MPFR_SIGN (y) > 0) ^ (cmp_x_1 < 0),
                                      rnd_mode, expo, {});
  }

  /* General case */
  inexact = mpfr_pow_general (z, x, y, rnd_mode, y_is_integer, &expo);

  MPFR_SAVE_EXPO_FREE (expo);
  return mpfr_check_range (z, inexact, rnd_mode);
}
Exemplo n.º 2
0
Arquivo: pow.c Projeto: epowers/mpfr
/* Assumes that the exponent range has already been extended and if y is
   an integer, then the result is not exact in unbounded exponent range. */
int
mpfr_pow_general (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y,
                  mpfr_rnd_t rnd_mode, int y_is_integer, mpfr_save_expo_t *expo)
{
  mpfr_t t, u, k, absx;
  int neg_result = 0;
  int k_non_zero = 0;
  int check_exact_case = 0;
  int inexact;
  /* Declaration of the size variable */
  mpfr_prec_t Nz = MPFR_PREC(z);               /* target precision */
  mpfr_prec_t Nt;                              /* working precision */
  mpfr_exp_t err;                              /* error */
  MPFR_ZIV_DECL (ziv_loop);


  MPFR_LOG_FUNC
    (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg rnd=%d",
      mpfr_get_prec (x), mpfr_log_prec, x,
      mpfr_get_prec (y), mpfr_log_prec, y, rnd_mode),
     ("z[%Pu]=%.*Rg inexact=%d",
      mpfr_get_prec (z), mpfr_log_prec, z, inexact));

  /* We put the absolute value of x in absx, pointing to the significand
     of x to avoid allocating memory for the significand of absx. */
  MPFR_ALIAS(absx, x, /*sign=*/ 1, /*EXP=*/ MPFR_EXP(x));

  /* We will compute the absolute value of the result. So, let's
     invert the rounding mode if the result is negative. */
  if (MPFR_IS_NEG (x) && is_odd (y))
    {
      neg_result = 1;
      rnd_mode = MPFR_INVERT_RND (rnd_mode);
    }

  /* compute the precision of intermediary variable */
  /* the optimal number of bits : see algorithms.tex */
  Nt = Nz + 5 + MPFR_INT_CEIL_LOG2 (Nz);

  /* initialise of intermediary variable */
  mpfr_init2 (t, Nt);

  MPFR_ZIV_INIT (ziv_loop, Nt);
  for (;;)
    {
      MPFR_BLOCK_DECL (flags1);

      /* compute exp(y*ln|x|), using MPFR_RNDU to get an upper bound, so
         that we can detect underflows. */
      mpfr_log (t, absx, MPFR_IS_NEG (y) ? MPFR_RNDD : MPFR_RNDU); /* ln|x| */
      mpfr_mul (t, y, t, MPFR_RNDU);                              /* y*ln|x| */
      if (k_non_zero)
        {
          MPFR_LOG_MSG (("subtract k * ln(2)\n", 0));
          mpfr_const_log2 (u, MPFR_RNDD);
          mpfr_mul (u, u, k, MPFR_RNDD);
          /* Error on u = k * log(2): < k * 2^(-Nt) < 1. */
          mpfr_sub (t, t, u, MPFR_RNDU);
          MPFR_LOG_MSG (("t = y * ln|x| - k * ln(2)\n", 0));
          MPFR_LOG_VAR (t);
        }
      /* estimate of the error -- see pow function in algorithms.tex.
         The error on t is at most 1/2 + 3*2^(EXP(t)+1) ulps, which is
         <= 2^(EXP(t)+3) for EXP(t) >= -1, and <= 2 ulps for EXP(t) <= -2.
         Additional error if k_no_zero: treal = t * errk, with
         1 - |k| * 2^(-Nt) <= exp(-|k| * 2^(-Nt)) <= errk <= 1,
         i.e., additional absolute error <= 2^(EXP(k)+EXP(t)-Nt).
         Total error <= 2^err1 + 2^err2 <= 2^(max(err1,err2)+1). */
      err = MPFR_NOTZERO (t) && MPFR_GET_EXP (t) >= -1 ?
        MPFR_GET_EXP (t) + 3 : 1;
      if (k_non_zero)
        {
          if (MPFR_GET_EXP (k) > err)
            err = MPFR_GET_EXP (k);
          err++;
        }
      MPFR_BLOCK (flags1, mpfr_exp (t, t, MPFR_RNDN));  /* exp(y*ln|x|)*/
      /* We need to test */
      if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (t) || MPFR_UNDERFLOW (flags1)))
        {
          mpfr_prec_t Ntmin;
          MPFR_BLOCK_DECL (flags2);

          MPFR_ASSERTN (!k_non_zero);
          MPFR_ASSERTN (!MPFR_IS_NAN (t));

          /* Real underflow? */
          if (MPFR_IS_ZERO (t))
            {
              /* Underflow. We computed rndn(exp(t)), where t >= y*ln|x|.
                 Therefore rndn(|x|^y) = 0, and we have a real underflow on
                 |x|^y. */
              inexact = mpfr_underflow (z, rnd_mode == MPFR_RNDN ? MPFR_RNDZ
                                        : rnd_mode, MPFR_SIGN_POS);
              if (expo != NULL)
                MPFR_SAVE_EXPO_UPDATE_FLAGS (*expo, MPFR_FLAGS_INEXACT
                                             | MPFR_FLAGS_UNDERFLOW);
              break;
            }

          /* Real overflow? */
          if (MPFR_IS_INF (t))
            {
              /* Note: we can probably use a low precision for this test. */
              mpfr_log (t, absx, MPFR_IS_NEG (y) ? MPFR_RNDU : MPFR_RNDD);
              mpfr_mul (t, y, t, MPFR_RNDD);            /* y * ln|x| */
              MPFR_BLOCK (flags2, mpfr_exp (t, t, MPFR_RNDD));
              /* t = lower bound on exp(y * ln|x|) */
              if (MPFR_OVERFLOW (flags2))
                {
                  /* We have computed a lower bound on |x|^y, and it
                     overflowed. Therefore we have a real overflow
                     on |x|^y. */
                  inexact = mpfr_overflow (z, rnd_mode, MPFR_SIGN_POS);
                  if (expo != NULL)
                    MPFR_SAVE_EXPO_UPDATE_FLAGS (*expo, MPFR_FLAGS_INEXACT
                                                 | MPFR_FLAGS_OVERFLOW);
                  break;
                }
            }

          k_non_zero = 1;
          Ntmin = sizeof(mpfr_exp_t) * CHAR_BIT;
          if (Ntmin > Nt)
            {
              Nt = Ntmin;
              mpfr_set_prec (t, Nt);
            }
          mpfr_init2 (u, Nt);
          mpfr_init2 (k, Ntmin);
          mpfr_log2 (k, absx, MPFR_RNDN);
          mpfr_mul (k, y, k, MPFR_RNDN);
          mpfr_round (k, k);
          MPFR_LOG_VAR (k);
          /* |y| < 2^Ntmin, therefore |k| < 2^Nt. */
          continue;
        }
      if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - err, Nz, rnd_mode)))
        {
          inexact = mpfr_set (z, t, rnd_mode);
          break;
        }

      /* check exact power, except when y is an integer (since the
         exact cases for y integer have already been filtered out) */
      if (check_exact_case == 0 && ! y_is_integer)
        {
          if (mpfr_pow_is_exact (z, absx, y, rnd_mode, &inexact))
            break;
          check_exact_case = 1;
        }

      /* reactualisation of the precision */
      MPFR_ZIV_NEXT (ziv_loop, Nt);
      mpfr_set_prec (t, Nt);
      if (k_non_zero)
        mpfr_set_prec (u, Nt);
    }
  MPFR_ZIV_FREE (ziv_loop);

  if (k_non_zero)
    {
      int inex2;
      long lk;

      /* The rounded result in an unbounded exponent range is z * 2^k. As
       * MPFR chooses underflow after rounding, the mpfr_mul_2si below will
       * correctly detect underflows and overflows. However, in rounding to
       * nearest, if z * 2^k = 2^(emin - 2), then the double rounding may
       * affect the result. We need to cope with that before overwriting z.
       * This can occur only if k < 0 (this test is necessary to avoid a
       * potential integer overflow).
       * If inexact >= 0, then the real result is <= 2^(emin - 2), so that
       * o(2^(emin - 2)) = +0 is correct. If inexact < 0, then the real
       * result is > 2^(emin - 2) and we need to round to 2^(emin - 1).
       */
      MPFR_ASSERTN (MPFR_EXP_MAX <= LONG_MAX);
      lk = mpfr_get_si (k, MPFR_RNDN);
      /* Due to early overflow detection, |k| should not be much larger than
       * MPFR_EMAX_MAX, and as MPFR_EMAX_MAX <= MPFR_EXP_MAX/2 <= LONG_MAX/2,
       * an overflow should not be possible in mpfr_get_si (and lk is exact).
       * And one even has the following assertion. TODO: complete proof.
       */
      MPFR_ASSERTD (lk > LONG_MIN && lk < LONG_MAX);
      /* Note: even in case of overflow (lk inexact), the code is correct.
       * Indeed, for the 3 occurrences of lk:
       *   - The test lk < 0 is correct as sign(lk) = sign(k).
       *   - In the test MPFR_GET_EXP (z) == __gmpfr_emin - 1 - lk,
       *     if lk is inexact, then lk = LONG_MIN <= MPFR_EXP_MIN
       *     (the minimum value of the mpfr_exp_t type), and
       *     __gmpfr_emin - 1 - lk >= MPFR_EMIN_MIN - 1 - 2 * MPFR_EMIN_MIN
       *     >= - MPFR_EMIN_MIN - 1 = MPFR_EMAX_MAX - 1. However, from the
       *     choice of k, z has been chosen to be around 1, so that the
       *     result of the test is false, as if lk were exact.
       *   - In the mpfr_mul_2si (z, z, lk, rnd_mode), if lk is inexact,
       *     then |lk| >= LONG_MAX >= MPFR_EXP_MAX, and as z is around 1,
       *     mpfr_mul_2si underflows or overflows in the same way as if
       *     lk were exact.
       * TODO: give a bound on |t|, then on |EXP(z)|.
       */
      if (rnd_mode == MPFR_RNDN && inexact < 0 && lk < 0 &&
          MPFR_GET_EXP (z) == __gmpfr_emin - 1 - lk && mpfr_powerof2_raw (z))
        {
          /* Rounding to nearest, real result > z * 2^k = 2^(emin - 2),
           * underflow case: as the minimum precision is > 1, we will
           * obtain the correct result and exceptions by replacing z by
           * nextabove(z).
           */
          MPFR_ASSERTN (MPFR_PREC_MIN > 1);
          mpfr_nextabove (z);
        }
      MPFR_CLEAR_FLAGS ();
      inex2 = mpfr_mul_2si (z, z, lk, rnd_mode);
      if (inex2)  /* underflow or overflow */
        {
          inexact = inex2;
          if (expo != NULL)
            MPFR_SAVE_EXPO_UPDATE_FLAGS (*expo, __gmpfr_flags);
        }
      mpfr_clears (u, k, (mpfr_ptr) 0);
    }
  mpfr_clear (t);

  /* update the sign of the result if x was negative */
  if (neg_result)
    {
      MPFR_SET_NEG(z);
      inexact = -inexact;
    }

  return inexact;
}
Exemplo n.º 3
0
int fmpq_poly_oz_sqrt_approx_babylonian(fmpq_poly_t f_sqrt, const fmpq_poly_t f, const long n, const mpfr_prec_t prec, const mpfr_prec_t bound, oz_flag_t flags, const fmpq_poly_t init) {
  fmpq_poly_t y;      fmpq_poly_init(y);
  fmpq_poly_t y_next; fmpq_poly_init(y_next);

  mpfr_t norm;      mpfr_init2(norm, prec);
  mpfr_t prev_norm; mpfr_init2(prev_norm, prec);

  if (init) {
    fmpq_poly_set(y, init);
  } else {
    fmpq_poly_set(y, f);
  }

  mpfr_t log_f;
  mpfr_init2(log_f, prec);

  uint64_t t = oz_walltime(0);
  int r = 0;

  for(long k=0; ; k++) {
    _fmpq_poly_oz_invert_approx(y_next, y, n, prec);
    fmpq_poly_oz_mul(y_next, f, y_next, n);
    fmpq_poly_add(y_next, y_next, y);
    fmpq_poly_scalar_div_si(y_next, y_next, 2);
    fmpq_poly_set(y, y_next);

    r = _fmpq_poly_oz_sqrt_approx_break(norm, y, f, n, bound, prec);

    if(flags & OZ_VERBOSE) {
      mpfr_log2(log_f, norm, MPFR_RNDN);
      mpfr_fprintf(stderr, "Computing sqrt(Σ)::  k: %4d,  Δ=|sqrt(Σ)^2-Σ|: %7.2Rf", k, log_f);
      fprintf(stderr, " <? %4ld, ", -bound);
      fprintf(stderr, "t: %8.2fs\n", oz_seconds(oz_walltime(t)));
      fflush(0);
    }

    if(r) {
      r = 0;
      break;
    }

    if (k>0 && mpfr_cmp_ui_2exp(norm, 1, bound) >= 0) {
      /* something went really wrong */
      r = -1;
      break;
    }

    mpfr_div_ui(prev_norm, prev_norm, 2, MPFR_RNDN);
    if (k>0 && mpfr_cmp(norm, prev_norm) >= 0) {
      /*  we don't converge any more */
      r = 1;
      break;
    }
    mpfr_set(prev_norm, norm, MPFR_RNDN);
  }
  mpfr_clear(log_f);
  fmpq_poly_set(f_sqrt, y);
  mpfr_clear(norm);
  mpfr_clear(prev_norm);
  fmpq_poly_clear(y_next);
  fmpq_poly_clear(y);
  return r;
}
Exemplo n.º 4
0
int fmpq_poly_oz_sqrt_approx_pade(fmpq_poly_t f_sqrt, const fmpq_poly_t f, const long n, const int p, const mpfr_prec_t prec, const mpfr_prec_t bound, oz_flag_t flags, const fmpq_poly_t init) {
  fmpq_poly_t y;       fmpq_poly_init(y);
  fmpq_poly_t y_next;  fmpq_poly_init(y_next);
  fmpq_poly_t z;       fmpq_poly_init(z);
  fmpq_poly_t z_next;  fmpq_poly_init(z_next);

  mpfr_t norm;      mpfr_init2(norm, prec);
  mpfr_t prev_norm; mpfr_init2(prev_norm, prec);
  mpfr_t log_f;     mpfr_init2(log_f, prec);

  if (init) {
    // z = y/x
    fmpq_poly_set(y, init);
    _fmpq_poly_oz_invert_approx(z, f, n, prec);
    fmpq_poly_oz_mul(z, z, y, n);
  } else {
    fmpq_poly_set(y, f);
    fmpq_poly_set_coeff_si(z, 0, 1);
  }

  fmpq_t *xi = (fmpq_t*)calloc(p, sizeof(fmpq_t));
  fmpq_t *a2 = (fmpq_t*)calloc(p, sizeof(fmpq_t));
  fmpq_t *c  = (fmpq_t*)calloc(p, sizeof(fmpq_t));
  fmpq_poly_t *t_ = (fmpq_poly_t*)calloc(p, sizeof(fmpq_poly_t));
  fmpq_poly_t *s_ = (fmpq_poly_t*)calloc(p, sizeof(fmpq_poly_t));

  mpfr_t pi;  mpfr_init2(pi, 4*prec);
  mpfr_const_pi(pi, MPFR_RNDN);

#pragma omp parallel for
  for(int i=0; i<p; i++) {
    mpfr_t xi_r; mpfr_init2(xi_r, 4*prec);
    mpfr_t a2_r; mpfr_init2(a2_r, 4*prec);

    /*  ζ_i = 1/2 * (1 + cos( (2·i -1)·π/(2·p) )) */
    mpfr_set_si(xi_r, 2*i+1, MPFR_RNDN);
    mpfr_mul(xi_r, xi_r, pi, MPFR_RNDN);
    mpfr_div_si(xi_r, xi_r, 2*p, MPFR_RNDN);
    mpfr_cos(xi_r, xi_r, MPFR_RNDN);
    mpfr_add_si(xi_r, xi_r, 1, MPFR_RNDN);
    mpfr_div_si(xi_r, xi_r, 2, MPFR_RNDN);

    /* α_i^2 = 1/ζ_i -1 */
    mpfr_set_si(a2_r, 1, MPFR_RNDN);
    mpfr_div(a2_r, a2_r, xi_r, MPFR_RNDN);
    mpfr_sub_si(a2_r, a2_r, 1, MPFR_RNDN);

    fmpq_init(xi[i]);
    fmpq_init(a2[i]);
    fmpq_set_mpfr(xi[i], xi_r, MPFR_RNDN);
    fmpq_set_mpfr(a2[i], a2_r, MPFR_RNDN);

    fmpq_init(c[i]);
    fmpq_poly_init(t_[i]);
    fmpq_poly_init(s_[i]);

    mpfr_clear(xi_r);
    mpfr_clear(a2_r);
  }

  mpfr_clear(pi);

  uint64_t t = oz_walltime(0);

  int r = 0;
  int cont = 1;
  for(long  k=0; cont; k++) {
    if (k == 0 || mpfr_cmp_ui(prev_norm, 1) > 0)
      _fmpq_poly_oz_sqrt_approx_scale(y, z, n, prec);

    /*   T = sum([1/xi[i] * ~(Z*Y + a2[i]) for i in range(p)]) */
#pragma omp parallel for
  for(int i=0; i<p; i++) {
    fmpq_poly_oz_mul(t_[i], z, y, n);
    fmpq_poly_get_coeff_fmpq(c[i], t_[i], 0);
    fmpq_add(c[i], c[i], a2[i]);
    fmpq_poly_set_coeff_fmpq(t_[i], 0, c[i]);
    fmpq_poly_scalar_mul_fmpq(t_[i], t_[i], xi[i]);
    _fmpq_poly_oz_invert_approx(s_[i], t_[i], n, prec);
  }

  for(int i=1; i<p; i++)
    fmpq_poly_add(s_[0],   s_[0], s_[i]);

#pragma omp parallel sections
    {
#pragma omp section
      {
        fmpq_poly_oz_mul(y_next, y, s_[0], n);
        fmpq_poly_scalar_div_si(y_next, y_next, p);
        fmpq_poly_set(y, y_next);
      }
#pragma omp section
      {
        fmpq_poly_oz_mul(z_next, z, s_[0], n);
        fmpq_poly_scalar_div_si(z_next, z_next, p);
        fmpq_poly_set(z, z_next);
      }
    }
    cont = !_fmpq_poly_oz_sqrt_approx_break(norm, y, f, n, bound, prec);

    if(flags & OZ_VERBOSE) {
      mpfr_log2(log_f, norm, MPFR_RNDN);
      mpfr_fprintf(stderr, "Computing sqrt(Σ)::  k: %4d,  Δ=|sqrt(Σ)^2-Σ|: %7.2Rf", k, log_f);
      fprintf(stderr, " <? %4ld, ", -bound);
      fprintf(stderr, "t: %8.2fs\n", oz_seconds(oz_walltime(t)));
      fflush(0);
    }

    if (cont) {
      if (k>0 && mpfr_cmp_ui_2exp(norm, 1, bound) >= 0) {
        /* something went really wrong */
        r = -1;
        break;
      }
      if (k>0 && mpfr_cmp(norm, prev_norm) >= 0) {
        /*  we don't converge any more */
        r = 1;
        break;
      }
      mpfr_set(prev_norm, norm, MPFR_RNDN);
    }
  }

  for(int i=0; i<p; i++) {
    fmpq_clear(xi[i]);
    fmpq_clear(a2[i]);
    fmpq_clear(c[i]);
    fmpq_poly_clear(t_[i]);
    fmpq_poly_clear(s_[i]);
  }
  free(xi);
  free(a2);
  free(c);
  free(t_);
  free(s_);

  mpfr_clear(log_f);
  fmpq_poly_set(f_sqrt, y);
  mpfr_clear(norm);
  mpfr_clear(prev_norm);
  fmpq_poly_clear(y_next);
  fmpq_poly_clear(y);
  fmpq_poly_clear(z_next);
  fmpq_poly_clear(z);
  return r;
}
Exemplo n.º 5
0
void generate_2D_sample (FILE *output, struct speed_params2D param)
{
  mpfr_t temp;
  double incr_prec;
  mpfr_t incr_x;
  mpfr_t x, x2;
  double prec;
  struct speed_params s;
  int i;
  int test;
  int nb_functions;
  double *t; /* store the timing of each implementation */

  /* We first determine how many implementations we have */
  nb_functions = 0;
  while (param.speed_funcs[nb_functions] != NULL)
    nb_functions++;

  t = malloc (nb_functions * sizeof (double));
  if (t == NULL)
    {
      fprintf (stderr, "Can't allocate memory.\n");
      abort ();
    }


  mpfr_init2 (temp, MPFR_SMALL_PRECISION);

  /* The precision is sampled from min_prec to max_prec with        */
  /* approximately nb_points_prec points. If logarithmic_scale_prec */
  /* is true, the precision is multiplied by incr_prec at each      */
  /* step. Otherwise, incr_prec is added at each step.              */
  if (param.logarithmic_scale_prec)
    {
      mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
      mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
      mpfr_root (temp, temp,
                 (unsigned long int)param.nb_points_prec, MPFR_RNDU);
      incr_prec = mpfr_get_d (temp, MPFR_RNDU);
    }
  else
    {
      incr_prec = (double)param.max_prec - (double)param.min_prec;
      incr_prec = incr_prec/((double)param.nb_points_prec);
    }

  /* The points x are sampled according to the following rule:             */
  /* If logarithmic_scale_x = 0:                                           */
  /*    nb_points_x points are equally distributed between min_x and max_x */
  /* If logarithmic_scale_x = 1:                                           */
  /*    nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At     */
  /*    each step, the current point is multiplied by incr_x.              */
  /* If logarithmic_scale_x = -1:                                          */
  /*    nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x)     */
  /*    (at each step, the current point is divided by incr_x);  and       */
  /*    nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x)       */
  /*    (at each step, the current point is multiplied by incr_x).         */
  mpfr_init2 (incr_x, param.max_prec);
  if (param.logarithmic_scale_x == 0)
    {
      mpfr_set_d (incr_x,
                  (param.max_x - param.min_x)/(double)param.nb_points_x,
                  MPFR_RNDU);
    }
  else if (param.logarithmic_scale_x == -1)
    {
      mpfr_set_d (incr_x,
                  2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
                  MPFR_RNDU);
      mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
    }
  else
    { /* other values of param.logarithmic_scale_x are considered as 1 */
      mpfr_set_d (incr_x,
                  (param.max_x - param.min_x)/(double)param.nb_points_x,
                  MPFR_RNDU);
      mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
    }

  /* Main loop */
  mpfr_init2 (x, param.max_prec);
  mpfr_init2 (x2, param.max_prec);
  prec = (double)param.min_prec;
  while (prec <= param.max_prec)
    {
      printf ("prec = %d\n", (int)prec);
      if (param.logarithmic_scale_x == 0)
        mpfr_set_d (temp, param.min_x, MPFR_RNDU);
      else if (param.logarithmic_scale_x == -1)
        {
          mpfr_set_d (temp, param.max_x, MPFR_RNDD);
          mpfr_exp2 (temp, temp, MPFR_RNDD);
          mpfr_neg (temp, temp, MPFR_RNDU);
        }
      else
        {
          mpfr_set_d (temp, param.min_x, MPFR_RNDD);
          mpfr_exp2 (temp, temp, MPFR_RNDD);
        }

      /* We perturb x a little bit, in order to avoid trailing zeros that */
      /* might change the behavior of algorithms.                         */
      mpfr_const_pi (x, MPFR_RNDN);
      mpfr_div_2ui (x, x, 7, MPFR_RNDN);
      mpfr_add_ui (x, x, 1, MPFR_RNDN);
      mpfr_mul (x, x, temp, MPFR_RNDN);

      test = 1;
      while (test)
        {
          mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
          mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);

          s.r = (mp_limb_t)mpfr_get_exp (x);
          s.size = (mpfr_prec_t)prec;
          s.align_xp = (mpfr_sgn (x) > 0)?1:2;
          mpfr_set_prec (x2, (mpfr_prec_t)prec);
          mpfr_set (x2, x, MPFR_RNDU);
          s.xp = x2->_mpfr_d;

          for (i=0; i<nb_functions; i++)
            {
              t[i] = speed_measure (param.speed_funcs[i], &s);
              mpfr_fprintf (output, "%e\t", t[i]);
            }
          fprintf (output, "%d\n", 1 + find_best (t, nb_functions));

          if (param.logarithmic_scale_x == 0)
            {
              mpfr_add (x, x, incr_x, MPFR_RNDU);
              if (mpfr_cmp_d (x, param.max_x) > 0)
                test=0;
            }
          else
            {
              if (mpfr_sgn (x) < 0 )
                { /* if x<0, it means that logarithmic_scale_x=-1 */
                  mpfr_div (x, x, incr_x, MPFR_RNDU);
                  mpfr_abs (temp, x, MPFR_RNDD);
                  mpfr_log2 (temp, temp, MPFR_RNDD);
                  if (mpfr_cmp_d (temp, param.min_x) < 0)
                    mpfr_neg (x, x, MPFR_RNDN);
                }
              else
                {
                  mpfr_mul (x, x, incr_x, MPFR_RNDU);
                  mpfr_set (temp, x, MPFR_RNDD);
                  mpfr_log2 (temp, temp, MPFR_RNDD);
                  if (mpfr_cmp_d (temp, param.max_x) > 0)
                    test=0;
                }
            }
        }

      prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
               : (prec + incr_prec) );
      fprintf (output, "\n");
    }

  free (t);
  mpfr_clear (incr_x);
  mpfr_clear (x);
  mpfr_clear (x2);
  mpfr_clear (temp);

  return;
}
void test_all() {
  long long int 
    failures_libm=0,
#ifdef HAVE_MATHLIB_H
    failures_libultim=0,
#endif
#ifdef HAVE_LIBMCR_H
    failures_libmcr=0,
#endif
    failures_crlibm=0;
  long long int i;
  double worst_err, global_worst_err=-200;
  db_number global_worst_inpt, global_worst_inpt2;

  i=0; 
  while(1+1==2)
  {
    input.d = randfun();
    input2.d = randfun();
    if (input.d>input2.d)
    {
      double temp=input.d;
      input.d=input2.d;
      input2.d=temp;
    }
/*    db_number ia,ib;
    ia.i[HI]=0x31100afb;
    ia.i[LO]=0x198a95fe;
    ib.i[HI]=0x3d42897b;
    ib.i[LO]=0x84591a4e;
    input.d=ia.d; input2.d=ib.d;*/
    ASSIGN_LOW(input_i,input.d);
    ASSIGN_UP(input_i,input2.d);
   

    res_crlibm = testfun_crlibm_interval(input_i);
    res_crlibm_low.d=LOW(res_crlibm);
    res_crlibm_up.d=UP(res_crlibm);
    mpfr_set_d(mp_inpt, input.d, GMP_RNDN);
    testfun_mpfr(mp_res, mp_inpt, GMP_RNDD);
    res_mpfr_low.d = mpfr_get_d(mp_res, GMP_RNDD);
    mpfr_set_d(mp_inpt, input2.d, GMP_RNDN);
    testfun_mpfr(mp_res, mp_inpt, GMP_RNDU);
    res_mpfr_up.d = mpfr_get_d(mp_res, GMP_RNDU);
/*    printHexa("resul crlibm low:",res_crlibm_low.d);
    printHexa("resul crlibm up:",res_crlibm_up.d);
    printHexa("resul mpfr low:",res_mpfr_low.d);
    printHexa("resul mpfr up:",res_mpfr_up.d);*/
    

#if PRINT_NAN
    if(1){
#else
      if( ((res_mpfr_low.i[HI] & 0x7ff00000) != 0x7ff00000)
       && ((res_mpfr_up.i[HI] & 0x7ff00000) != 0x7ff00000) ) {
#endif
	if( (res_crlibm_low.i[LO] != res_mpfr_low.i[LO]) 
	    || (res_crlibm_low.i[HI] != res_mpfr_low.i[HI])
	    || (res_crlibm_up.i[LO] != res_mpfr_up.i[LO])
	    || (res_crlibm_up.i[HI] != res_mpfr_up.i[HI]) )
	    {
#if DETAILED_REPORT
	  printf("*** CRLIBM ERROR ***\n");
	  PRINT_INPUT_ERROR;
	  printf("crlibm gives    [%.50e,%.50e] \n         [(%08x %08x),(%08x %08x)] \n", 
		 res_crlibm_low.d, res_crlibm_up.d,
		 res_crlibm_low.i[HI], res_crlibm_low.i[LO],
		 res_crlibm_up.i[HI], res_crlibm_up.i[LO]);
	  printf("MPFR gives      [%.50e,%.50e] \n         [(%08x %08x),(%08x %08x)] \n\n", 
		 res_mpfr_low.d, 
		 res_mpfr_up.d,
		 res_mpfr_low.i[HI], 
		 res_mpfr_low.i[LO],
		 res_mpfr_up.i[HI],
		 res_mpfr_up.i[LO]
		 );
#endif
#if WORST_ERROR_REPORT
	  mpfr_set_d(mp_inpt, res_crlibm_low.d, GMP_RNDN);  
	  mpfr_sub(mp_inpt, mp_inpt, mp_res, GMP_RNDN);  
	  mpfr_div(mp_inpt, mp_inpt, mp_res, GMP_RNDN);
	  mpfr_abs(mp_inpt, mp_inpt, GMP_RNDN);
	  mpfr_log2(mp_inpt, mp_inpt, GMP_RNDN);
	  worst_err=mpfr_get_d(mp_inpt, GMP_RNDN);

	  if (worst_err>global_worst_err){
	    global_worst_err=worst_err;
	    global_worst_inpt.d  = input.d;
	    global_worst_inpt2.d = input2.d;
	  }
	  mpfr_set_d(mp_inpt, res_crlibm_up.d, GMP_RNDN);
	  mpfr_sub(mp_inpt, mp_inpt, mp_res, GMP_RNDN);
	  mpfr_div(mp_inpt, mp_inpt, mp_res, GMP_RNDN);
	  mpfr_abs(mp_inpt, mp_inpt, GMP_RNDN);
	  mpfr_log2(mp_inpt, mp_inpt, GMP_RNDN);
	  worst_err=mpfr_get_d(mp_inpt, GMP_RNDN);
	  
	  if (worst_err>global_worst_err){
	    global_worst_err=worst_err;
	    global_worst_inpt.d  = input.d;
            global_worst_inpt2.d = input2.d;
          }
	  printf("Worst crlibm relative error so far : 2^(%f)\n",global_worst_err);
	  printf(" for x =%.50e     (%08x %08x) \n", 
		 global_worst_inpt.d, 
		 global_worst_inpt.i[HI], 
		 global_worst_inpt.i[LO]);
#endif

	  failures_crlibm++;
	}
	
      }
      i++;
      if((i % 10000)==0) 
      {
	printf(" CRLIBM       : %lld failures out of %lld (ratio %e) \n",failures_crlibm, i,
	       ((double)failures_crlibm)/(double)i);
	printf("\n");
      }
  }
}




void usage(char *fct_name){
  /* fprintf (stderr, "\n%s: Soak-test for crlibm and other mathematical libraries \n", fct_name); */
  fprintf (stderr, "\nUsage: %s function seed \n", fct_name);
  fprintf (stderr, " function      : name of function to test \n");
  fprintf (stderr, " seed          : integer seed for the random number generator \n");
  exit (1);
}