Beispiel #1
void UsingModulo() {
    mpz_class rem;
    mpz_class num = 1120390317;
    unsigned int sum = 0;
    unsigned int pro = 1;
    for (int i = 0; i < 1000000; i++) {
        num = 1120390317;
        sum = 0;
        pro = 1;
        __mpz_struct *q = num.get_mpz_t();
        while(*(q->_mp_d) != 0) {
            auto digit = mpn_divrem_1 (q->_mp_d, (mp_size_t) 0, 
                    q->_mp_d, q->_mp_size, (mp_limb_t) 10);
            sum += digit;
            pro *= digit;
Beispiel #2
_gst_mpz_tdiv_qr_si (gst_mpz *quot, const gst_mpz *num, intptr_t den)
    mp_ptr np;
    mp_ptr qp;
    mp_size_t nsize = num->size;
    mp_size_t sign_remainder = nsize;
    mp_size_t sign_quotient = nsize ^ den;
    mp_limb_t rem;

    nsize = ABS (nsize);

    if (nsize == 0)
        quot->size = 0;
        return 0;

    if (quot->alloc < nsize)
        gst_mpz_realloc (quot, nsize);

    qp = quot->d;
    np = num->d;

    /* Copy numerator to temporary space if it overlaps with the quotient.  */
    if (np == qp)
        mp_ptr tp;
        tp = (mp_ptr) alloca (nsize * SIZEOF_MP_LIMB_T);
        MPN_COPY (tp, np, nsize);
        np = tp;

    rem = mpn_divrem_1 (qp, 0L, np, nsize, ABS(den));

    nsize -=  qp[nsize - 1] == 0;
    quot->size = sign_quotient >= 0 ? nsize : -nsize;
    alloca (0);
    return sign_remainder >= 0 ? rem : -rem;
Beispiel #3
static void
mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
  mp_ptr qp;

  qp = tp;

  if (dn == 1)
    np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
  else if (dn == 2)
    mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
    mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
  else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
	   BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
	   (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
	   + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
      mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
      /* We need to allocate separate remainder area, since mpn_mu_div_qr does
	 not handle overlap between the numerator and remainder areas.
	 FIXME: Make it handle such overlap.  */
      mp_ptr rp = TMP_ALLOC_LIMBS (dn);
      mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
      mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
      MPN_COPY (np, rp, dn);

Beispiel #4
static void
_gst_mpz_tdiv_q_ui (gst_mpz *quot, const gst_mpz *num, mp_limb_t den)
    mp_ptr np;
    mp_ptr qp;
    mp_size_t nsize = num->size;
    mp_size_t sign_quotient = nsize;

    nsize = ABS (nsize);

    if (nsize == 0)
        quot->size = 0;

    if (quot->alloc < nsize)
        gst_mpz_realloc (quot, nsize);

    qp = quot->d;
    np = num->d;

    /* Copy numerator to temporary space if it overlaps with the quotient.  */
    if (np == qp)
        mp_ptr tp;
        tp = (mp_ptr) alloca (nsize * SIZEOF_MP_LIMB_T);
        MPN_COPY (tp, np, nsize);
        np = tp;

    mpn_divrem_1 (qp, 0L, np, nsize, den);
    nsize -=  qp[nsize - 1] == 0;
    quot->size = sign_quotient >= 0 ? nsize : -nsize;
    alloca (0);
Beispiel #5
void bn_div1_low(dig_t *c, dig_t *d, const dig_t *a, int size, dig_t b) {
	*d = mpn_divrem_1(c, 0, a, size, b); 
Beispiel #6
mpn_tdiv_q (mp_ptr qp,
	   mp_srcptr np, mp_size_t nn,
	   mp_srcptr dp, mp_size_t dn)
  mp_ptr new_dp, new_np, tp, rp, scratch;
  mp_limb_t cy, dh, qh;
  mp_size_t new_nn, qn;
  mp_limb_t dinv;
  int cnt;

  ASSERT (nn >= dn);
  ASSERT (dn > 0);
  ASSERT (dp[dn - 1] != 0);
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));

  if (dn == 1)
      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);

  scratch = TMP_ALLOC_LIMBS(nn + 1);
  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */

  if (qn + FUDGE >= dn)
      /* |________________________|
                          |_______|  */
      new_np = scratch;

      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np, nn, cnt);
	  new_np[nn] = cy;
	  new_nn = nn + (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (dn);
	  mpn_lshift (new_dp, dp, dn, cnt);

	  if (dn == 2)
	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
          invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
          invert_1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
          qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
           mp_ptr inv = TMP_ALLOC_LIMBS(dn);
           mpn_invert(inv, new_dp, dn);
           qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv);
	  if (cy == 0)
	    qp[qn - 1] = qh;
	  else if (UNLIKELY (qh != 0))
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - dn;
	      for (i = 0; i < n; i++)
		qp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
      else  /* divisor is already normalised */
	  if (new_np != np)
	    MPN_COPY (new_np, np, nn);

	  if (dn == 2)
	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
           invert_1(dinv, dh, dp[dn - 2]);
           qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv);
           invert_1(dinv, dh, dp[dn - 2]);
           qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv);
           mp_ptr inv = TMP_ALLOC_LIMBS(dn);
           mpn_invert(inv, dp, dn);
           qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv);
	  qp[nn - dn] = qh;
      /* |________________________|
                |_________________|  */
      tp = TMP_ALLOC_LIMBS (qn + 1);

      new_np = scratch;
      new_nn = 2 * qn + 1;
      if (new_np == np)
	/* We need {np,nn} to remain untouched until the final adjustment, so
	   we need to allocate separate space for new_np.  */
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);

      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
	  new_np[new_nn] = cy;

	  new_nn += (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

	  if (qn + 1 == 2)
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
          invert_1(dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
          invert_1(dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
           mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
           mpn_invert(inv, new_dp, qn + 1);
           qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv); 
	  if (cy == 0)
	    tp[qn] = qh;
	  else if (UNLIKELY (qh != 0))
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - (qn + 1);
	      for (i = 0; i < n; i++)
		tp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
      else  /* divisor is already normalised */
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
                            mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N,
                            int sinonly, int alternating)
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;
    int cosorsin;


    if (2 * N >= FACTORIAL_TAB_SIZE - 1)
        flint_printf("_arb_sin_cos_taylor_rs: N too large!\n");

    if (N <= 1)
        if (N == 0)
            flint_mpn_zero(ysin, xn);
            if (!sinonly) flint_mpn_zero(ycos, xn);
            error[0] = 0;
        else if (N == 1)
            flint_mpn_copyi(ysin, x, xn);
            if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES);
            error[0] = 1;
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        mpn_sqr(XPOW_WRITE(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);

        for (cosorsin = sinonly; cosorsin < 2; cosorsin++)
            flint_mpn_zero(s, xn + 1);

            /* todo: skip one nonscalar multiplication (use x^m)
               when starting on x^0 */
            power = (N - 1) % m;

            for (k = N - 1; k >= 0; k--)
                c = factorial_tab_numer[2 * k + cosorsin];
                new_denom = factorial_tab_denom[2 * k + cosorsin];
                old_denom = factorial_tab_denom[2 * k + cosorsin + 2];

                /* change denominators */
                if (new_denom != old_denom && k < N - 1)
                    if (alternating && (k % 2 == 0))
                        s[xn] += old_denom;

                    mpn_divrem_1(s, 0, s, xn + 1, old_denom);

                    if (alternating && (k % 2 == 0))
                        s[xn] -= 1;

                if (power == 0)
                    /* add c * x^0 -- only top limb is affected */
                    if (alternating & k)
                        s[xn] -= c;
                        s[xn] += c;

                    /* Outer polynomial evaluation: multiply by x^m */
                    if (k != 0)
                        mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                        flint_mpn_copyi(s, t + xn, xn + 1);

                    power = m - 1;
                    if (alternating & k)
                        s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c);
                        s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);


            /* finally divide by denominator */
            if (cosorsin == 0)
                mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]);

                /* perturb down to a number < 1 if necessary. note that this
                   does not invalidate the error bound: 1 - ulp is either
                   1 ulp too small or must be closer to the exact value */
                if (t[xn] == 0)
                    flint_mpn_copyi(ycos, t, xn);
                    flint_mpn_store(ycos, xn, LIMB_ONES);
                mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]);
                mpn_mul(t, s, xn + 1, x, xn);
                flint_mpn_copyi(ysin, t + xn, xn);

        /* error bound (ulp) */
        error[0] = 2;

Beispiel #8
void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error,
    mp_srcptr x, mp_size_t xn, ulong N)
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;


    if (N >= FACTORIAL_TAB_SIZE - 1)
        flint_printf("_arb_exp_taylor_rs: N too large!\n");

    if (N <= 3)
        if (N <= 1)
            flint_mpn_zero(y, xn);
            y[xn] = N;
            error[0] = 0;
        else if (N == 2)
            flint_mpn_copyi(y, x, xn);
            y[xn] = 1;
            error[0] = 0;
            /* 1 + x + x^2 / 2 */
            t = TMP_ALLOC_LIMBS(2 * xn);

            mpn_sqr(t, x, xn);
            mpn_rshift(t + xn, t + xn, xn, 1);
            y[xn] = mpn_add_n(y, x, t + xn, xn) + 1;

            error[0] = 2;
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        /* TODO: drop evenness assumption since we don't have sign issues here? */
        /* TODO: then just need to fix power construction below... */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        flint_mpn_copyi(XPOW_READ(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);

        flint_mpn_zero(s, xn + 1);

        /* todo: skip one nonscalar multiplication (use x^m)
           when starting on x^0 */
        power = (N - 1) % m;

        for (k = N - 1; k >= 0; k--)
            c = factorial_tab_numer[k];
            new_denom = factorial_tab_denom[k];
            old_denom = factorial_tab_denom[k+1];

            /* change denominators */
            if (new_denom != old_denom && k < N - 1)
                mpn_divrem_1(s, 0, s, xn + 1, old_denom);

            if (power == 0)
                /* add c * x^0 -- only top limb is affected */
                s[xn] += c;

                /* Outer polynomial evaluation: multiply by x^m */
                if (k != 0)
                    mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                    flint_mpn_copyi(s, t + xn, xn + 1);

                power = m - 1;
                s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);


        /* finally divide by denominator */
        mpn_divrem_1(y, 0, s, xn + 1, factorial_tab_denom[0]);

        /* error bound (ulp) */
        error[0] = 2;

Beispiel #9
/* returns 0 if result exact, non-zero otherwise */
mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mpfr_rnd_t rnd_mode)
  long i;
  int sh;
  mp_size_t xn, yn, dif;
  mp_limb_t *xp, *yp, *tmp, c, d;
  mpfr_exp_t exp;
  int inexact, middle = 1, nexttoinf;

    (("x[%Pu]=%.*Rg u=%lu rnd=%d",
      mpfr_get_prec(x), mpfr_log_prec, x, u, rnd_mode),
     ("y[%Pu]=%.*Rg inexact=%d",
      mpfr_get_prec(y), mpfr_log_prec, y, inexact));

      if (MPFR_IS_NAN (x))
          MPFR_SET_NAN (y);
      else if (MPFR_IS_INF (x))
          MPFR_SET_INF (y);
          MPFR_SET_SAME_SIGN (y, x);
          MPFR_RET (0);
          if (u == 0) /* 0/0 is NaN */
              MPFR_SET_SAME_SIGN (y, x);
  else if (MPFR_UNLIKELY (u <= 1))
      if (u < 1)
          /* x/0 is Inf since x != 0*/
          MPFR_SET_INF (y);
          MPFR_SET_SAME_SIGN (y, x);
          mpfr_set_divby0 ();
          MPFR_RET (0);
      else /* y = x/1 = x */
        return mpfr_set (y, x, rnd_mode);
  else if (MPFR_UNLIKELY (IS_POW2 (u)))
    return mpfr_div_2si (y, x, MPFR_INT_CEIL_LOG2 (u), rnd_mode);


  MPFR_TMP_MARK (marker);
  xn = MPFR_LIMB_SIZE (x);
  yn = MPFR_LIMB_SIZE (y);

  xp = MPFR_MANT (x);
  yp = MPFR_MANT (y);
  exp = MPFR_GET_EXP (x);

  dif = yn + 1 - xn;

  /* we need to store yn+1 = xn + dif limbs of the quotient */
  /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */
  tmp = MPFR_TMP_LIMBS_ALLOC (yn + 1);

  c = (mp_limb_t) u;
  MPFR_ASSERTN (u == c);
  if (dif >= 0)
    c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */
  else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */
    c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c);

  inexact = (c != 0);

  /* First pass in estimating next bit of the quotient, in case of RNDN    *
   * In case we just have the right number of bits (postpone this ?),      *
   * we need to check whether the remainder is more or less than half      *
   * the divisor. The test must be performed with a subtraction, so as     *
   * to prevent carries.                                                   */

  if (MPFR_LIKELY (rnd_mode == MPFR_RNDN))
      if (c < (mp_limb_t) u - c) /* We have u > c */
        middle = -1;
      else if (c > (mp_limb_t) u - c)
        middle = 1;
        middle = 0; /* exactly in the middle */

  /* If we believe that we are right in the middle or exact, we should check
     that we did not neglect any word of x (division large / 1 -> small). */

  for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++)
    if (xp[i])
      inexact = middle = 1; /* larger than middle */

     If the high limb of the result is 0 (xp[xn-1] < u), remove it.
     Otherwise, compute the left shift to be performed to normalize.
     In the latter case, we discard some low bits computed. They
     contain information useful for the rounding, hence the updating
     of middle and inexact.

  if (tmp[yn] == 0)
      MPN_COPY(yp, tmp, yn);
      exp -= GMP_NUMB_BITS;
      int shlz;

      count_leading_zeros (shlz, tmp[yn]);

      /* shift left to normalize */
      if (MPFR_LIKELY (shlz != 0))
          mp_limb_t w = tmp[0] << shlz;

          mpn_lshift (yp, tmp + 1, yn, shlz);
          yp[0] += tmp[0] >> (GMP_NUMB_BITS - shlz);

          if (w > (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)))
            { middle = 1; }
          else if (w < (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)))
            { middle = -1; }
            { middle = (c != 0); }

          inexact = inexact || (w != 0);
          exp -= shlz;
        { /* this happens only if u == 1 and xp[xn-1] >=
Beispiel #10
mpn_divrem (mp_ptr qp, mp_size_t qxn,
	    mp_ptr np, mp_size_t nn,
	    mp_srcptr dp, mp_size_t dn)
  ASSERT (qxn >= 0);
  ASSERT (nn >= dn);
  ASSERT (dn >= 1);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  if (dn == 1)
      mp_limb_t ret;
      mp_ptr q2p;
      mp_size_t qn;

      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);

      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
      qn = nn + qxn - 1;
      MPN_COPY (qp, q2p, qn);
      ret = q2p[qn];

      return ret;
  else if (dn == 2)
      return mpn_divrem_2 (qp, qxn, np, nn, dp);
      mp_ptr rp, q2p;
      mp_limb_t qhl;
      mp_size_t qn;

      if (UNLIKELY (qxn != 0))
	  mp_ptr n2p;
	  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
	  MPN_ZERO (n2p, qxn);
	  MPN_COPY (n2p + qxn, np, nn);
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
	  MPN_COPY (np, rp, dn);
	  qn = nn - dn + qxn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
	  MPN_COPY (np, rp, dn);	/* overwrite np area with remainder */
	  qn = nn - dn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
      return qhl;
Beispiel #11
check (void)
  mp_limb_t  wp[100], xp[100], yp[100];
  mp_size_t  size = 100;

  refmpn_zero (xp, size);
  refmpn_zero (yp, size);
  refmpn_zero (wp, size);

  pre ("mpn_add_n");
  mpn_add_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_add_nc
  pre ("mpn_add_nc");
  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();

#if HAVE_NATIVE_mpn_addlsh1_n
  pre ("mpn_addlsh1_n");
  mpn_addlsh1_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_and_n
  pre ("mpn_and_n");
  mpn_and_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_andn_n
  pre ("mpn_andn_n");
  mpn_andn_n (wp, xp, yp, size);
  post ();

  pre ("mpn_addmul_1");
  mpn_addmul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_addmul_1c
  pre ("mpn_addmul_1c");
  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();

#if HAVE_NATIVE_mpn_com_n
  pre ("mpn_com_n");
  mpn_com_n (wp, xp, size);
  post ();

#if HAVE_NATIVE_mpn_copyd
  pre ("mpn_copyd");
  mpn_copyd (wp, xp, size);
  post ();

#if HAVE_NATIVE_mpn_copyi
  pre ("mpn_copyi");
  mpn_copyi (wp, xp, size);
  post ();

  pre ("mpn_divexact_1");
  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_divexact_by3c");
  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));
  post ();

  pre ("mpn_divrem_1");
  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_divrem_1c
  pre ("mpn_divrem_1c");
  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();

  pre ("mpn_gcd_1");
  xp[0] |= 1;
  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_gcd_finda
  pre ("mpn_gcd_finda");
  xp[0] |= 1;
  xp[1] |= 1;
  notdead += mpn_gcd_finda (xp);
  post ();

  pre ("mpn_hamdist");
  notdead += mpn_hamdist (xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_ior_n
  pre ("mpn_ior_n");
  mpn_ior_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_iorn_n
  pre ("mpn_iorn_n");
  mpn_iorn_n (wp, xp, yp, size);
  post ();

  pre ("mpn_lshift");
  mpn_lshift (wp, xp, size, 1);
  post ();

  pre ("mpn_mod_1");
  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_mod_1c
  pre ("mpn_mod_1c");
  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();

#if GMP_NUMB_BITS % 4 == 0
  pre ("mpn_mod_34lsub1");
  notdead += mpn_mod_34lsub1 (xp, size);
  post ();

  pre ("mpn_modexact_1_odd");
  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_modexact_1c_odd");
  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));
  post ();

  pre ("mpn_mul_1");
  mpn_mul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_mul_1c
  pre ("mpn_mul_1c");
  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();

#if HAVE_NATIVE_mpn_mul_2
  pre ("mpn_mul_2");
  mpn_mul_2 (wp, xp, size-1, yp);
  post ();

  pre ("mpn_mul_basecase");
  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);
  post ();

#if HAVE_NATIVE_mpn_nand_n
  pre ("mpn_nand_n");
  mpn_nand_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_nior_n
  pre ("mpn_nior_n");
  mpn_nior_n (wp, xp, yp, size);
  post ();

  pre ("mpn_popcount");
  notdead += mpn_popcount (xp, size);
  post ();

  pre ("mpn_preinv_mod_1");
  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,
                               refmpn_invert_limb (GMP_NUMB_MAX));
  post ();

#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
  pre ("mpn_preinv_divrem_1");
  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,
                       refmpn_invert_limb (GMP_NUMB_MAX), 0);
  post ();

#if HAVE_NATIVE_mpn_rsh1add_n
  pre ("mpn_rsh1add_n");
  mpn_rsh1add_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_rsh1sub_n
  pre ("mpn_rsh1sub_n");
  mpn_rsh1sub_n (wp, xp, yp, size);
  post ();

  pre ("mpn_rshift");
  mpn_rshift (wp, xp, size, 1);
  post ();

  pre ("mpn_sqr_basecase");
  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);
  post ();

  pre ("mpn_submul_1");
  mpn_submul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_submul_1c
  pre ("mpn_submul_1c");
  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();

  pre ("mpn_sub_n");
  mpn_sub_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_sub_nc
  pre ("mpn_sub_nc");
  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();

#if HAVE_NATIVE_mpn_sublsh1_n
  pre ("mpn_sublsh1_n");
  mpn_sublsh1_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_udiv_qrnnd
  pre ("mpn_udiv_qrnnd");
  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_udiv_qrnnd_r
  pre ("mpn_udiv_qrnnd_r");
  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);
  post ();

#if HAVE_NATIVE_mpn_umul_ppmm
  pre ("mpn_umul_ppmm");
  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);
  post ();

#if HAVE_NATIVE_mpn_umul_ppmm_r
  pre ("mpn_umul_ppmm_r");
  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);
  post ();

#if HAVE_NATIVE_mpn_xor_n
  pre ("mpn_xor_n");
  mpn_xor_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_xnor_n
  pre ("mpn_xnor_n");
  mpn_xnor_n (wp, xp, yp, size);
  post ();
Beispiel #12
mpn_rootrem (mp_ptr rootp, mp_ptr remp,
	     mp_srcptr up, mp_size_t un, mp_limb_t nth)
  mp_ptr pp, qp, xp;
  mp_size_t pn, xn, qn;
  unsigned long int unb, xnb, bit;
  unsigned int cnt;
  mp_size_t i;
  unsigned long int n_valid_bits, adj;


  /* The extra factor 1.585 = log(3)/log(2) here is for the worst case
     overestimate of the root, i.e., when the code rounds a root that is
     2+epsilon to 3, and then powers this to a potentially huge power.  We
     could generalize the code for detecting root=1 a few lines below to deal
     with xnb <= k, for some small k.  For example, when xnb <= 2, meaning
     the root should be 1, 2, or 3, we could replace this factor by the much
     smaller log(5)/log(4).  */

#define PP_ALLOC (2 + (mp_size_t) (un*1.585))

  count_leading_zeros (cnt, up[un - 1]);
  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;

  xnb = (unb - 1) / nth + 1;
  if (xnb == 1)
      if (remp == NULL)
	remp = pp;
      mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
      MPN_NORMALIZE (remp, un);
      rootp[0] = 1;
      return un;

  xn = (xnb + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;

  xp = TMP_ALLOC_LIMBS (xn + 1);

  /* Set initial root to only ones.  This is an overestimate of the actual root
     by less than a factor of 2.  */
  for (i = 0; i < xn; i++)
    xp[i] = GMP_NUMB_MAX;
  xp[xnb / GMP_NUMB_BITS] = ((mp_limb_t) 1 << (xnb % GMP_NUMB_BITS)) - 1;

  /* Improve the initial approximation, one bit at a time.  Keep the
     approximations >= root(U,nth).  */
  bit = xnb - 2;
  n_valid_bits = 0;
  for (i = 0; (nth >> i) != 0; i++)
      mp_limb_t xl = xp[bit / GMP_NUMB_BITS];
      xp[bit / GMP_NUMB_BITS] = xl ^ (mp_limb_t) 1 << bit % GMP_NUMB_BITS;
      pn = mpn_pow_1 (pp, xp, xn, nth, qp);
      /* If the new root approximation is too small, restore old value.  */
      if (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)))
	xp[bit / GMP_NUMB_BITS] = xl;		/* restore old value */
      n_valid_bits += 1;
      if (bit == 0)
	goto done;

  adj = n_valid_bits - 1;

  /* Newton loop.  Converges downwards towards root(U,nth).  Currently we use
     full precision from iteration 1.  Clearly, we should use just n_valid_bits
     of precision in each step, and thus save most of the computations.  */
  while (n_valid_bits <= xnb)
      mp_limb_t cy;

      pn = mpn_pow_1 (pp, xp, xn, nth - 1, qp);
      qp[xn - 1] = 0;		/* pad quotient to make it always xn limbs */
      mpn_tdiv_qr (qp, pp, (mp_size_t) 0, up, un, pp, pn); /* junk remainder */
      cy = mpn_addmul_1 (qp, xp, xn, nth - 1);
      if (un - pn == xn)
	  cy += qp[xn];
	  if (cy == nth)
	      for (i = xn - 1; i >= 0; i--)
		qp[i] = GMP_NUMB_MAX;
	      cy = nth - 1;

      qp[xn] = cy;
      qn = xn + (cy != 0);

      mpn_divrem_1 (xp, (mp_size_t) 0, qp, qn, nth);
      n_valid_bits = n_valid_bits * 2 - adj;

  /* The computed result might be one unit too large.  Adjust as necessary.  */
  pn = mpn_pow_1 (pp, xp, xn, nth, qp);
  if (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0))
      mpn_decr_u (xp, 1);
      pn = mpn_pow_1 (pp, xp, xn, nth, qp);

      ASSERT_ALWAYS (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)));

  if (remp == NULL)
    remp = pp;
  mpn_sub (remp, up, un, pp, pn);
  MPN_NORMALIZE (remp, un);
  MPN_COPY (rootp, xp, xn);
  return un;
check_data (void)
  static const struct {
    mp_limb_t  n[1];
    mp_size_t  nsize;
    mp_limb_t  d;
    mp_size_t  qxn;
    mp_limb_t  want_q[5];
    mp_limb_t  want_r;
  } data[] = {
    { { 0 }, 1, 1, 0,
      { 0 }, 0},

    { { 5 }, 1, 2, 0,
      { 2 }, 1},

#if GMP_NUMB_BITS == 32
    { { 0x3C }, 1, 0xF2, 1,
      { 0x3F789854, 0 }, 0x98 },

#if GMP_NUMB_BITS == 64
    { { 0x3C }, 1, 0xF2, 1,
      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, 0x0E },

    /* This case exposed some wrong code generated by SGI cc on mips64 irix
       6.5 with -n32 -O2, in the fractional loop for normalized divisor
       using udiv_qrnnd_preinv.  A test "x>al" in one of the sub_ddmmss
       expansions came out wrong, leading to an incorrect quotient.  */
    { { CNST_LIMB(0x3C00000000000000) }, 1, CNST_LIMB(0xF200000000000000), 1,
      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, CNST_LIMB(0x0E00000000000000) },

  mp_limb_t  dinv, got_r, got_q[numberof(data[0].want_q)];
  mp_size_t  qsize;
  int        i, shift;

  for (i = 0; i < numberof (data); i++)
      qsize = data[i].nsize + data[i].qxn;
      ASSERT_ALWAYS (qsize <= numberof (got_q));

      got_r = mpn_divrem_1 (got_q, data[i].qxn, data[i].n, data[i].nsize,
      if (got_r != data[i].want_r
          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
          printf        ("mpn_divrem_1 wrong at data[%d]\n", i);
          mpn_trace     ("  n", data[i].n, data[i].nsize);
          printf        ("  nsize=%ld\n", (long) data[i].nsize);
          mp_limb_trace ("  d", data[i].d);
          printf        ("  qxn=%ld\n", (long) data[i].qxn);
          mpn_trace     ("  want q", data[i].want_q, qsize);
          mpn_trace     ("  got  q", got_q, qsize);
          mp_limb_trace ("  want r", data[i].want_r);
          mp_limb_trace ("  got  r", got_r);
          abort ();

      /* test if available */
#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
      shift = refmpn_count_leading_zeros (data[i].d);
      dinv = refmpn_invert_limb (data[i].d << shift);
      got_r = mpn_preinv_divrem_1 (got_q, data[i].qxn,
                                   data[i].n, data[i].nsize,
                                   data[i].d, dinv, shift);
      if (got_r != data[i].want_r
          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
          printf        ("mpn_preinv divrem_1 wrong at data[%d]\n", i);
          printf        ("  shift=%d\n", shift);
          mp_limb_trace ("  dinv", dinv);
          goto bad;
Beispiel #14
check_functions (void)
  mp_limb_t  wp[2], wp2[2], xp[2], yp[2], r;
  int  i;

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 123;
      yp[0] = 456;
      mpn_add_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 579);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 702);
      ASSERT_ALWAYS (r == 0);

#if HAVE_NATIVE_mpn_copyd
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyd (xp+1, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[1] == 123);

#if HAVE_NATIVE_mpn_copyi
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyi (xp, xp+1, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[0] == 456);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 1605;
      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));
      ASSERT_ALWAYS (wp[0] == 321);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 1296;
      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));
      ASSERT_ALWAYS (wp[0] == 432);
      ASSERT_ALWAYS (r == 0);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 578;
      r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17));
      ASSERT_ALWAYS (wp[0] == 34);
      ASSERT_ALWAYS (r == 0);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 287;
      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[1] == 41);
      ASSERT_ALWAYS (wp[0] == 0);
      ASSERT_ALWAYS (r == 0);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 290;
      r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[0] == 41);
      ASSERT_ALWAYS (r == 3);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 12;
      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));
      ASSERT_ALWAYS (r == 3);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 0x1001;
      mpn_lshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x2002);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 14;
      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (r == 2);

#if (GMP_NUMB_BITS % 4) == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      int  bits = (GMP_NUMB_BITS / 4) * 3;
      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;
      mp_limb_t  want = GMP_NUMB_MAX % mod;
      xp[0] = GMP_NUMB_MAX;
      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);
      ASSERT_ALWAYS (r % mod == want);

  //   DECL_modexact_1c_odd ((*modexact_1c_odd)); 

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 14;
      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (wp[0] == 56);
      ASSERT_ALWAYS (r == 0);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 5;
      yp[0] = 7;
      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 35);
      ASSERT_ALWAYS (wp[1] == 0);

#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 0x101;
      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,
                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);
      ASSERT_ALWAYS (wp[0] == 0x202);
      ASSERT_ALWAYS (wp[1] == 0);
      ASSERT_ALWAYS (r == 0);

#if GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = GMP_LIMB_HIGHBIT+123;
      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,
                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));
      ASSERT_ALWAYS (r == 123);

 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
        xp[0] = 5;
        mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r);
        ASSERT_ALWAYS (wp[0] == 78);

 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
        mpn_sumdiff_n (wp, wp2,xp, yp,1);
        ASSERT_ALWAYS (wp[0] == 8);
        ASSERT_ALWAYS (wp2[0] == 2);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 0x8008;
      mpn_rshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x4004);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 5;
      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 25);
      ASSERT_ALWAYS (wp[1] == 0);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 999;
      yp[0] = 666;
      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 333);

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 210);
      ASSERT_ALWAYS (r == 0);
Beispiel #15
_arb_atan_taylor_naive(mp_ptr y, mp_limb_t * error,
    mp_srcptr x, mp_size_t xn, ulong N, int alternating)
    ulong k;
    mp_ptr s, t, x1, x2, u;
    mp_size_t nn = xn + 1;

    if (N == 0)
        flint_mpn_zero(y, xn);
        error[0] = 0;

    if (N == 1)
        flint_mpn_copyi(y, x, xn);
        error[0] = 0;

    s = flint_malloc(sizeof(mp_limb_t) * nn);
    t = flint_malloc(sizeof(mp_limb_t) * nn);
    u = flint_malloc(sizeof(mp_limb_t) * 2 * nn);
    x1 = flint_malloc(sizeof(mp_limb_t) * nn);
    x2 = flint_malloc(sizeof(mp_limb_t) * nn);

    flint_mpn_zero(s, nn);
    flint_mpn_zero(t, nn);
    flint_mpn_zero(u, 2 * nn);
    flint_mpn_zero(x1, nn);
    flint_mpn_zero(x2, nn);

    /* x1 = x */
    flint_mpn_copyi(x1 + 1, x, xn);

    /* x2 = x * x */
    mpn_mul_n(u, x1, x1, nn);
    flint_mpn_copyi(x2, u + nn, nn);

    /* s = t = x */
    flint_mpn_copyi(s, x1, nn);
    flint_mpn_copyi(t, x1, nn);

    for (k = 1; k < N; k++)
        /* t = t * x2 */
        mpn_mul_n(u, t, x2, nn);
        flint_mpn_copyi(t, u + nn, nn);

        /* u = t / (2k+1) */
        mpn_divrem_1(u, 0, t, nn, 2 * k + 1);

        if (alternating & k)
            mpn_sub_n(s, s, u, nn);
            mpn_add_n(s, s, u, nn);

    flint_mpn_copyi(y, s + 1, xn);
    error[0] = 2;

Beispiel #16
/* returns 0 if result exact, non-zero otherwise */
mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mp_rnd_t rnd_mode)
  long int xn, yn, dif, sh, i;
  mp_limb_t *xp, *yp, *tmp, c, d;
  mp_exp_t exp;
  int inexact, middle = 1;

      if (MPFR_IS_NAN(x))
      else if (MPFR_IS_INF(x))
	  if (u == 0)/* 0/0 is NaN */

  if (MPFR_UNLIKELY(u == 0))
      /* x/0 is Inf */
      MPFR_SET_SAME_SIGN(y, x);



  xn = MPFR_LIMB_SIZE(x);
  yn = MPFR_LIMB_SIZE(y);

  xp = MPFR_MANT(x);
  yp = MPFR_MANT(y);
  exp = MPFR_GET_EXP (x);

  dif = yn + 1 - xn;

  /* we need to store yn+1 = xn + dif limbs of the quotient */
  /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */
  tmp = (mp_limb_t*) TMP_ALLOC((yn + 1) * BYTES_PER_MP_LIMB);

  c = (mp_limb_t) u;
  MPFR_ASSERTN(u == c);
  if (dif >= 0)
    c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */
  else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */
    c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c);

  inexact = (c != 0);

  /* First pass in estimating next bit of the quotient, in case of RNDN    *
   * In case we just have the right number of bits (postpone this ?),      *
   * we need to check whether the remainder is more or less than half      *
   * the divisor. The test must be performed with a subtraction, so as     *
   * to prevent carries.                                                   */

  if (rnd_mode == GMP_RNDN)
      if (c < (mp_limb_t) u - c) /* We have u > c */
	middle = -1;
      else if (c > (mp_limb_t) u - c)
	middle = 1;
	middle = 0; /* exactly in the middle */

  /* If we believe that we are right in the middle or exact, we should check
     that we did not neglect any word of x (division large / 1 -> small). */

  for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++)
    if (xp[i])
      inexact = middle = 1; /* larger than middle */

     If the high limb of the result is 0 (xp[xn-1] < u), remove it.
     Otherwise, compute the left shift to be performed to normalize.
     In the latter case, we discard some low bits computed. They
     contain information useful for the rounding, hence the updating
     of middle and inexact.

  if (tmp[yn] == 0)
      MPN_COPY(yp, tmp, yn);
      exp -= BITS_PER_MP_LIMB;
      sh = 0;
      count_leading_zeros (sh, tmp[yn]);

      /* shift left to normalize */
      if (sh)
          mp_limb_t w = tmp[0] << sh;

          mpn_lshift (yp, tmp + 1, yn, sh);
          yp[0] += tmp[0] >> (BITS_PER_MP_LIMB - sh);

          if (w > (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = 1; }
          else if (w < (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = -1; }
            { middle = (c != 0); }

          inexact = inexact || (w != 0);
          exp -= sh;
        { /* this happens only if u == 1 and xp[xn-1] >=
             1<<(BITS_PER_MP_LIMB-1). It might be better to handle the
             u == 1 case seperately ?

          MPN_COPY (yp, tmp + 1, yn);
Beispiel #17
Datei: log.c Projekt: isuruf/arb
arb_log_arf(arb_t z, const arf_t x, slong prec)
    if (arf_is_special(x))
        if (arf_is_pos_inf(x))
    else if (ARF_SGNBIT(x))
    else if (ARF_IS_POW2(x))
        if (fmpz_is_one(ARF_EXPREF(x)))
            fmpz_t exp;
            _fmpz_add_fast(exp, ARF_EXPREF(x), -1);
            arb_const_log2(z, prec + 2);
            arb_mul_fmpz(z, z, exp, prec);
    else if (COEFF_IS_MPZ(*ARF_EXPREF(x)))
        arb_log_arf_huge(z, x, prec);
        slong exp, wp, wn, N, r, closeness_to_one;
        mp_srcptr xp;
        mp_size_t xn, tn;
        mp_ptr tmp, w, t, u;
        mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy;
        int negative, inexact, used_taylor_series;

        exp = ARF_EXP(x);
        negative = 0;

        ARF_GET_MPN_READONLY(xp, xn, x);

        /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */
        closeness_to_one = 0;

        if (exp == 0)
            slong i;

            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]);

            if (closeness_to_one == FLINT_BITS)
                for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i]));
        else if (exp == 1)
            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP));

            if (closeness_to_one == FLINT_BITS)
                slong i;

                for (i = xn - 2; xp[i] == 0; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i]));


        /* if |t-1| <= 0.5               */
        /* |log(1+t) - t| <= t^2         */
        /* |log(1+t) - (t-t^2/2)| <= t^3 */
        if (closeness_to_one > prec + 1)
            inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
        else if (2 * closeness_to_one > prec + 1)
            arf_t t, u;
            arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul_2exp_si(u, u, -1);
            inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);

        /* Absolute working precision (NOT rounded to a limb multiple) */
        wp = prec + closeness_to_one + 5;

        /* Too high precision to use table */
        if (wp > ARB_LOG_TAB2_PREC)
            arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND);
            arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec);

        /* Working precision in limbs */
        wn = (wp + FLINT_BITS - 1) / FLINT_BITS;


        tmp = TMP_ALLOC_LIMBS(4 * wn + 3);
        w = tmp;        /* requires wn+1 limbs */
        t = w + wn + 1; /* requires wn+1 limbs */
        u = t + wn + 1; /* requires 2wn+1 limbs */

        /* read x-1 */
        if (xn <= wn)
            flint_mpn_zero(w, wn - xn);
            mpn_lshift(w + wn - xn, xp, xn, 1);
            error = 0;
            mpn_lshift(w, xp + xn - wn, wn, 1);
            error = 1;

        /* First table-based argument reduction */
        if (wp <= ARB_LOG_TAB1_PREC)
            q1bits = ARB_LOG_TAB11_BITS;
            q1bits = ARB_LOG_TAB21_BITS;

        p1 = w[wn-1] >> (FLINT_BITS - q1bits);

        /* Special case: covers logarithms of small integers */
        if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits))))
            p2 = 0;
            flint_mpn_zero(t, wn);
            used_taylor_series = 0;
            N = r = 0; /* silence compiler warning */
            /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */
            w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1;
            mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits));
            error += 1;

            /* Second table-based argument reduction (fused with log->atanh
               conversion) */
            if (wp <= ARB_LOG_TAB1_PREC)
                q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS;
                q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS;

            p2 = w[wn-1] >> (FLINT_BITS - q2bits);

            u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits);
            flint_mpn_zero(u, wn);
            flint_mpn_copyi(t, u + wn, wn + 1);
            t[wn] += p2 + (UWORD(1) << (q2bits + 1));
            u[2 * wn] -= p2;
            mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1);

            /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */
            error += 3;

            /* |w| <= 2^-r */
            r = _arb_mpn_leading_zeros(w, wn);

            /* N >= (wp-r)/(2r) */
            N = (wp - r + (2*r-1)) / (2*r);
            N = FLINT_MAX(N, 0);

            /* Evaluate Taylor series */
            _arb_atan_taylor_rs(t, &error2, w, wn, N, 0);
            /* Multiply by 2 */
            mpn_lshift(t, t, wn, 1);
            /* Taylor series evaluation error (multiply by 2) */
            error += error2 * 2;

            used_taylor_series = 1;

        /* Size of output number */
        tn = wn;

        /* First table lookup */
        if (p1 != 0)
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn);
                mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn);

        /* Second table lookup */
        if (p2 != 0)
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn);
                mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn);

        /* add exp * log(2) */

        if (exp > 0)
            cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp);
            t[tn] = cy;
            tn += (cy != 0);
            error += exp;
        else if (exp < 0)
            t[tn] = 0;
            u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp);

            if (mpn_cmp(t, u, tn + 1) >= 0)
                mpn_sub_n(t, t, u, tn + 1);
                mpn_sub_n(t, u, t, tn + 1);
                negative = 1;

            error += (-exp);

            tn += (t[tn] != 0);

        /* The accumulated arithmetic error */
        mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS);

        /* Truncation error from the Taylor series */
        if (used_taylor_series)
            mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1);

        /* Set the midpoint */
        inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec);
        if (inexact)
            arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
