Example #1
0
int
mpfr_urandomb (mpfr_ptr rop, gmp_randstate_t rstate)
{
    mpfr_limb_ptr rp;
    mpfr_prec_t nbits;
    mp_size_t nlimbs;
    mp_size_t k; /* number of high zero limbs */
    mpfr_exp_t exp;
    int cnt;

    rp = MPFR_MANT (rop);
    nbits = MPFR_PREC (rop);
    nlimbs = MPFR_LIMB_SIZE (rop);
    MPFR_SET_POS (rop);
    cnt = nlimbs * GMP_NUMB_BITS - nbits;

    /* Uniform non-normalized significand */
    /* generate exactly nbits so that the random generator stays in the same
       state, independent of the machine word size GMP_NUMB_BITS */
    mpfr_rand_raw (rp, rstate, nbits);
    if (MPFR_LIKELY (cnt != 0)) /* this will put the low bits to zero */
        mpn_lshift (rp, rp, nlimbs, cnt);

    /* Count the null significant limbs and remaining limbs */
    exp = 0;
    k = 0;
    while (nlimbs != 0 && rp[nlimbs - 1] == 0)
    {
        k ++;
        nlimbs --;
        exp -= GMP_NUMB_BITS;
    }

    if (MPFR_LIKELY (nlimbs != 0)) /* otherwise value is zero */
    {
        count_leading_zeros (cnt, rp[nlimbs - 1]);
        /* Normalization */
        if (mpfr_set_exp (rop, exp - cnt))
        {
            /* If the exponent is not in the current exponent range, we
               choose to return a NaN as this is probably a user error.
               Indeed this can happen only if the exponent range has been
               reduced to a very small interval and/or the precision is
               huge (very unlikely). */
            MPFR_SET_NAN (rop);
            __gmpfr_flags |= MPFR_FLAGS_NAN; /* Can't use MPFR_RET_NAN */
            return 1;
        }
        if (cnt != 0)
            mpn_lshift (rp + k, rp, nlimbs, cnt);
        if (k != 0)
            MPN_ZERO (rp, k);
    }
    else
        MPFR_SET_ZERO (rop);

    return 0;
}
Example #2
0
void
mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
{
  mp_ptr     lp, l1p, f1p;
  mp_size_t  size;
  mp_limb_t  c;
  TMP_DECL;

  ASSERT (ln != lnsub1);

  /* handle small n quickly, and hide the special case for L[-1]=-1 */
  if (n <= FIB_TABLE_LUCNUM_LIMIT)
    {
      mp_limb_t  f  = FIB_TABLE (n);
      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);

      /* L[n] = F[n] + 2F[n-1] */
      PTR(ln)[0] = f + 2*f1;
      SIZ(ln) = 1;

      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */
      PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1);
      SIZ(lnsub1) = (n == 0 ? -1 : 1);

      return;
    }

  TMP_MARK;
  size = MPN_FIB2_SIZE (n);
  f1p = TMP_ALLOC_LIMBS (size);

  MPZ_REALLOC (ln,     size+1);
  MPZ_REALLOC (lnsub1, size+1);
  lp  = PTR(ln);
  l1p = PTR(lnsub1);

  size = mpn_fib2_ui (l1p, f1p, n);

  /* L[n] = F[n] + 2F[n-1] */
#if HAVE_NATIVE_mpn_addlsh1_n
  c = mpn_addlsh1_n (lp, l1p, f1p, size);
#else
  c = mpn_lshift (lp, f1p, size, 1);
  c += mpn_add_n (lp, lp, l1p, size);
#endif
  lp[size] = c;
  SIZ(ln) = size + (c != 0);

  /* L[n-1] = 2F[n] - F[n-1] */
  c = mpn_lshift (l1p, l1p, size, 1);
  c -= mpn_sub_n (l1p, l1p, f1p, size);
  ASSERT ((mp_limb_signed_t) c >= 0);
  l1p[size] = c;
  SIZ(lnsub1) = size + (c != 0);

  TMP_FREE;
}
Example #3
0
static void fp_double(element_ptr c, element_ptr a) {
	eptr ad = (eptr)a->data, cd = (eptr)c->data;
	if (!ad->flag) {
		cd->flag = 0;
	}
	else {
		fptr p = (fptr)c->field->data;
		const size_t t = p->limbs;
		if (mpn_lshift(cd->d, ad->d, t, 1)) {
			cd->flag = 2;
			// Again, assumes result is not zero.
			mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
		}
		else {
			int i = mpn_cmp(cd->d, p->primelimbs, t);
			if (!i) {
				cd->flag = 0;
			}
			else {
				cd->flag = 2;
				if (i > 0) {
					mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
				}
			}
		}
	}
}
Example #4
0
/*
 * Set f to z, choosing the smallest precision for f
 * so that z = f*(2^BPML)*zs*2^(RetVal)
 */
static int
set_z (mpfr_ptr f, mpz_srcptr z, mp_size_t *zs)
{
  mp_limb_t *p;
  mp_size_t s;
  int c;
  mpfr_prec_t pf;

  MPFR_ASSERTD (mpz_sgn (z) != 0);

  /* Remove useless ending 0 */
  for (p = PTR (z), s = *zs = ABS (SIZ (z)) ; *p == 0; p++, s--)
    MPFR_ASSERTD (s >= 0);

  /* Get working precision */
  count_leading_zeros (c, p[s-1]);
  pf = s * GMP_NUMB_BITS - c;
  if (pf < MPFR_PREC_MIN)
    pf = MPFR_PREC_MIN;
  mpfr_init2 (f, pf);

  /* Copy Mantissa */
  if (MPFR_LIKELY (c))
    mpn_lshift (MPFR_MANT (f), p, s, c);
  else
    MPN_COPY (MPFR_MANT (f), p, s);

  MPFR_SET_SIGN (f, mpz_sgn (z));
  MPFR_SET_EXP (f, 0);

  return -c;
}
Example #5
0
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {np, n}^2. The error is less than n ulps of rp[n]. */
void
mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
{
  mp_size_t k;

  MPFR_STAT_STATIC_ASSERT (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
  k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
    : (n+4)/2; /* ensures that k >= (n+3)/2 */
  MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  if (k < 0)
    /* we can't use mpn_sqr_basecase here, since it requires
       n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
       is not exported by GMP */
    mpn_sqr_n (rp, np, n);
  else if (k == 0)
    mpfr_mulhigh_n_basecase (rp, np, np, n);
  else
    {
      mp_size_t l = n - k;
      mp_limb_t cy;

      mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
      mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
      /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
      cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
      cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
      mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
    }
}
Example #6
0
/* truncates, returns inexact */
int
_arf_get_integer_mpn(mp_ptr y, mp_srcptr x, mp_size_t xn, slong exp)
{
    slong bot_exp = exp - xn * FLINT_BITS;

    if (bot_exp >= 0)
    {
        mp_size_t bot_limbs;
        mp_bitcnt_t bot_bits;

        bot_limbs = bot_exp / FLINT_BITS;
        bot_bits = bot_exp % FLINT_BITS;

        flint_mpn_zero(y, bot_limbs);

        if (bot_bits == 0)
            flint_mpn_copyi(y + bot_limbs, x, xn);
        else
            y[bot_limbs + xn] = mpn_lshift(y + bot_limbs, x, xn, bot_bits);

        /* exact */
        return 0;
    }
    else if (exp <= 0)
    {
        /* inexact */
        return 1;
    }
    else
    {
        mp_size_t top_limbs;
        mp_bitcnt_t top_bits;
        mp_limb_t cy;

        top_limbs = exp / FLINT_BITS;
        top_bits = exp % FLINT_BITS;

        if (top_bits == 0)
        {
            flint_mpn_copyi(y, x + xn - top_limbs, top_limbs);
            /* inexact */
            return 1;
        }
        else
        {
            /* can be inexact */
            cy = mpn_rshift(y, x + xn - top_limbs - 1,
                            top_limbs + 1, FLINT_BITS - top_bits);

            return (cy != 0) || (top_limbs + 1 != xn);
        }
    }
}
Example #7
0
void tc4_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_size_t bits)
{
   if (xn == 0) *rn = 0;
   else
	{
		mp_size_t xu = ABS(xn);
		mp_limb_t msl = mpn_lshift(rp, xp, xu, bits);
      if (msl) 
		{
			rp[xu] = msl;
			*rn = (xn >= 0 ? xn + 1 : xn - 1);
		} else
		   *rn = xn;
	}
}
Example #8
0
File: wrappers.c Project: rcook/ghc
/* Perform left-shift operation on MPN
 *
 * pre-conditions:
 *  - 0 < count
 *  - rn = sn + ceil(count / GMP_NUMB_BITS)
 *  - sn > 0
 *
 * return value: most-significant limb stored in {rp,rn} result
 */
mp_limb_t
integer_gmp_mpn_lshift (mp_limb_t rp[], const mp_limb_t sp[],
                        const mp_size_t sn, const mp_bitcnt_t count)
{
    const mp_size_t    limb_shift = count / GMP_NUMB_BITS;
    const unsigned int bit_shift  = count % GMP_NUMB_BITS;
    const mp_size_t    rn0        = sn + limb_shift;

    memset(rp, 0, limb_shift*sizeof(mp_limb_t));
    if (bit_shift) {
        const mp_limb_t msl = mpn_lshift(&rp[limb_shift], sp, sn, bit_shift);
        rp[rn0] = msl;
        return msl;
    } else {
        memcpy(&rp[limb_shift], sp, sn*sizeof(mp_limb_t));
        return rp[rn0-1];
    }
}
Example #9
0
File: mul.c Project: gnooth/xcl
static int
mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
{
    /* Old implementation */
    int sign_product, cc, inexact;
    mpfr_exp_t ax;
    mp_limb_t *tmp;
    mp_limb_t b1;
    mpfr_prec_t bq, cq;
    mp_size_t bn, cn, tn, k;
    MPFR_TMP_DECL(marker);

    /* deal with special cases */
    if (MPFR_ARE_SINGULAR(b,c))
    {
        if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
        {
            MPFR_SET_NAN(a);
            MPFR_RET_NAN;
        }
        sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
        if (MPFR_IS_INF(b))
        {
            if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
            {
                MPFR_SET_SIGN(a,sign_product);
                MPFR_SET_INF(a);
                MPFR_RET(0); /* exact */
            }
            else
            {
                MPFR_SET_NAN(a);
                MPFR_RET_NAN;
            }
        }
        else if (MPFR_IS_INF(c))
        {
            if (MPFR_NOTZERO(b))
            {
                MPFR_SET_SIGN(a, sign_product);
                MPFR_SET_INF(a);
                MPFR_RET(0); /* exact */
            }
            else
            {
                MPFR_SET_NAN(a);
                MPFR_RET_NAN;
            }
        }
        else
        {
            MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
            MPFR_SET_SIGN(a, sign_product);
            MPFR_SET_ZERO(a);
            MPFR_RET(0); /* 0 * 0 is exact */
        }
    }
    sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );

    ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);

    bq = MPFR_PREC(b);
    cq = MPFR_PREC(c);

    MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */

    bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */
    cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */
    k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
    tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
    /* <= k, thus no int overflow */
    MPFR_ASSERTD(tn <= k);

    /* Check for no size_t overflow*/
    MPFR_ASSERTD((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
    MPFR_TMP_MARK(marker);
    tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB);

    /* multiplies two mantissa in temporary allocated space */
    b1 = (MPFR_LIKELY(bn >= cn)) ?
         mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
         : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);

    /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
       with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
    b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */

    /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
       then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
       and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
    tmp += k - tn;
    if (MPFR_UNLIKELY(b1 == 0))
        mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
    cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
                         MPFR_IS_NEG_SIGN(sign_product),
                         MPFR_PREC (a), rnd_mode, &inexact);

    /* cc = 1 ==> result is a power of two */
    if (MPFR_UNLIKELY(cc))
        MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;

    MPFR_TMP_FREE(marker);

    {
        mpfr_exp_t ax2 = ax + (mpfr_exp_t) (b1 - 1 + cc);
        if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
            return mpfr_overflow (a, rnd_mode, sign_product);
        if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
        {
            /* In the rounding to the nearest mode, if the exponent of the exact
               result (i.e. before rounding, i.e. without taking cc into account)
               is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
               both arguments are powers of 2), then round to zero. */
            if (rnd_mode == MPFR_RNDN &&
                    (ax + (mpfr_exp_t) b1 < __gmpfr_emin ||
                     (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
                rnd_mode = MPFR_RNDZ;
            return mpfr_underflow (a, rnd_mode, sign_product);
        }
        MPFR_SET_EXP (a, ax2);
        MPFR_SET_SIGN(a, sign_product);
    }
    MPFR_RET (inexact);
}
Example #10
0
void
mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
			   mp_size_t w6n, mp_ptr tp)
{
  mp_size_t m;
  mp_limb_t cy;

  m = 2*n + 1;
#define w0 rp
#define w2 (rp + 2*n)
#define w6 (rp + 6*n)

  ASSERT (w6n > 0);
  ASSERT (w6n <= 2*n);

  /* Using formulas similar to Marco Bodrato's

     W5 = W5 + W4
     W1 =(W4 - W1)/2
     W4 = W4 - W0
     W4 =(W4 - W1)/4 - W6*16
     W3 =(W2 - W3)/2
     W2 = W2 - W3

     W5 = W5 - W2*65      May be negative.
     W2 = W2 - W6 - W0
     W5 =(W5 + W2*45)/2   Now >= 0 again.
     W4 =(W4 - W2)/3
     W2 = W2 - W4

     W1 = W5 - W1         May be negative.
     W5 =(W5 - W3*8)/9
     W3 = W3 - W5
     W1 =(W1/15 + W5)/2   Now >= 0 again.
     W5 = W5 - W1

     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
	   W4 = f(2), W5 = f(1/2), W6 = f(oo),

     Note that most intermediate results are positive; the ones that
     may be negative are represented in two's complement. We must
     never shift right a value that may be negative, since that would
     invalidate the sign bit. On the other hand, divexact by odd
     numbers work fine with two's complement.
  */

  mpn_add_n (w5, w5, w4, m);
  if (flags & toom7_w1_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w1, w1, w4, m);
#else
      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w1, w4, w1, m);
#else
      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  mpn_sub (w4, w4, m, w0, 2*n);
  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
  mpn_rshift (w4, w4, m, 2); /* w4>=0 */

  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
  mpn_sub (w4, w4, m, tp, w6n+1);

  if (flags & toom7_w3_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w3, w3, w2, m);
#else
      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w3, w2, w3, m);
#else
      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }

  mpn_sub_n (w2, w2, w3, m);

  mpn_submul_1 (w5, w2, m, 65);
  mpn_sub (w2, w2, m, w6, w6n);
  mpn_sub (w2, w2, m, w0, 2*n);

  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
  mpn_rshift (w5, w5, m, 1);
  mpn_sub_n (w4, w4, w2, m);

  mpn_divexact_by3 (w4, w4, m);
  mpn_sub_n (w2, w2, w4, m);

  mpn_sub_n (w1, w5, w1, m);
  mpn_lshift (tp, w3, m, 3);
  mpn_sub_n (w5, w5, tp, m);
  mpn_divexact_by9 (w5, w5, m);
  mpn_sub_n (w3, w3, w5, m);

  mpn_divexact_by15 (w1, w1, m);
  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
  mpn_sub_n (w5, w5, w1, m);

  /* These bounds are valid for the 4x4 polynomial product of toom44,
   * and they are conservative for toom53 and toom62. */
  ASSERT (w1[2*n] < 2);
  ASSERT (w2[2*n] < 3);
  ASSERT (w3[2*n] < 4);
  ASSERT (w4[2*n] < 3);
  ASSERT (w5[2*n] < 2);

  /* Addition chain. Note carries and the 2n'th limbs that need to be
   * added in.
   *
   * Special care is needed for w2[2n] and the corresponding carry,
   * since the "simple" way of adding it all together would overwrite
   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
   * the high half of w3 and the low half of w4.
   *
   *         7    6    5    4    3    2    1    0
   *    |    |    |    |    |    |    |    |    |
   *                  ||w3 (2n+1)|
   *             ||w4 (2n+1)|
   *        ||w5 (2n+1)|        ||w1 (2n+1)|
   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
   *  -----------------------------------------------
   *  r |    |    |    |    |    |    |    |    |
   *        c7   c6   c5   c4   c3                 Carries to propagate
   */

  cy = mpn_add_n (rp + n, rp + n, w1, m);
  MPN_INCR_U (w2 + n + 1, n , cy);
  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
  if (w6n > n + 1)
    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
  else
    {
      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
#if WANT_ASSERT
      {
	mp_size_t i;
	for (i = w6n; i <= n; i++)
	  ASSERT (w5[n + i] == 0);
      }
#endif
    }
}
Example #11
0
void
_gst_mpz_gcd (gst_mpz *g, const gst_mpz *u, const gst_mpz *v)
{
    int g_zero_bits, u_zero_bits, v_zero_bits;
    mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
    mp_ptr tp;
    mp_ptr up = u->d;
    mp_size_t usize = ABS (u->size);
    mp_ptr vp = v->d;
    mp_size_t vsize = ABS (v->size);
    mp_size_t gsize;

    /* GCD(0, V) == GCD (U, 1) == V.  */
    if (usize == 0 || (vsize == 1 && vp[0] == 1))
    {
        gst_mpz_copy_abs (g, v);
        return;
    }

    /* GCD(U, 0) == GCD (1, V) == U.  */
    if (vsize == 0 || (usize == 1 && up[0] == 1))
    {
        gst_mpz_copy_abs (g, u);
        return;
    }

    if (usize == 1)
    {
        gst_mpz_realloc (g, 1);
        g->size = 1;
        g->d[0] = mpn_gcd_1 (vp, vsize, up[0]);
        return;
    }

    if (vsize == 1)
    {
        gst_mpz_realloc (g, 1);
        g->size = 1;
        g->d[0] = mpn_gcd_1 (up, usize, vp[0]);
        return;
    }

    /*  Eliminate low zero bits from U and V and move to temporary storage.  */
    u_zero_bits = mpn_scan1 (up, 0);
    u_zero_limbs = u_zero_bits / BITS_PER_MP_LIMB;
    u_zero_bits &= BITS_PER_MP_LIMB - 1;
    up += u_zero_limbs;
    usize -= u_zero_limbs;

    /* Operands could be destroyed for big-endian case, but let's be tidy.  */
    tp = up;
    up = (mp_ptr) alloca (usize * SIZEOF_MP_LIMB_T);
    if (u_zero_bits != 0)
    {
        mpn_rshift (up, tp, usize, u_zero_bits);
        usize -= up[usize - 1] == 0;
    }
    else
        MPN_COPY (up, tp, usize);

    v_zero_bits = mpn_scan1 (vp, 0);
    v_zero_limbs = v_zero_bits / BITS_PER_MP_LIMB;
    v_zero_bits &= BITS_PER_MP_LIMB - 1;
    vp += v_zero_limbs;
    vsize -= v_zero_limbs;

    /* Operands could be destroyed for big-endian case, but let's be tidy.  */
    tp = vp;
    vp = (mp_ptr) alloca (vsize * SIZEOF_MP_LIMB_T);
    if (v_zero_bits != 0)
    {
        mpn_rshift (vp, tp, vsize, v_zero_bits);
        vsize -= vp[vsize - 1] == 0;
    }
    else
        MPN_COPY (vp, tp, vsize);

    if (u_zero_limbs > v_zero_limbs)
    {
        g_zero_limbs = v_zero_limbs;
        g_zero_bits = v_zero_bits;
    }
    else if (u_zero_limbs < v_zero_limbs)
    {
        g_zero_limbs = u_zero_limbs;
        g_zero_bits = u_zero_bits;
    }
    else  /*  Equal.  */
    {
        g_zero_limbs = u_zero_limbs;
        g_zero_bits = MIN (u_zero_bits, v_zero_bits);
    }

    /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */
    vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
            ? mpn_gcd (vp, vp, vsize, up, usize)
            : mpn_gcd (vp, up, usize, vp, vsize);

    /*  Here G <-- V << (g_zero_limbs*BITS_PER_MP_LIMB + g_zero_bits).  */
    gsize = vsize + g_zero_limbs;
    if (g_zero_bits != 0)
    {
        mp_limb_t cy_limb;
        gsize += (vp[vsize - 1] >> (BITS_PER_MP_LIMB - g_zero_bits)) != 0;
        if (g->alloc < gsize)
            gst_mpz_realloc (g, gsize);
        MPN_ZERO (g->d, g_zero_limbs);

        tp = g->d + g_zero_limbs;
        cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
        if (cy_limb != 0)
            tp[vsize] = cy_limb;
    }
Example #12
0
File: mul.c Project: gnooth/xcl
int
mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
{
    int sign, inexact;
    mpfr_exp_t ax, ax2;
    mp_limb_t *tmp;
    mp_limb_t b1;
    mpfr_prec_t bq, cq;
    mp_size_t bn, cn, tn, k;
    MPFR_TMP_DECL (marker);

    MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode),
                   ("a[%#R]=%R inexact=%d", a, a, inexact));

    /* deal with special cases */
    if (MPFR_ARE_SINGULAR (b, c))
    {
        if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
        {
            MPFR_SET_NAN (a);
            MPFR_RET_NAN;
        }
        sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
        if (MPFR_IS_INF (b))
        {
            if (!MPFR_IS_ZERO (c))
            {
                MPFR_SET_SIGN (a, sign);
                MPFR_SET_INF (a);
                MPFR_RET (0);
            }
            else
            {
                MPFR_SET_NAN (a);
                MPFR_RET_NAN;
            }
        }
        else if (MPFR_IS_INF (c))
        {
            if (!MPFR_IS_ZERO (b))
            {
                MPFR_SET_SIGN (a, sign);
                MPFR_SET_INF (a);
                MPFR_RET(0);
            }
            else
            {
                MPFR_SET_NAN (a);
                MPFR_RET_NAN;
            }
        }
        else
        {
            MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
            MPFR_SET_SIGN (a, sign);
            MPFR_SET_ZERO (a);
            MPFR_RET (0);
        }
    }
    sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));

    ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
    /* Note: the exponent of the exact result will be e = bx + cx + ec with
       ec in {-1,0,1} and the following assumes that e is representable. */

    /* FIXME: Useful since we do an exponent check after ?
     * It is useful iff the precision is big, there is an overflow
     * and we are doing further mults...*/
#ifdef HUGE
    if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
        return mpfr_overflow (a, rnd_mode, sign);
    if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
        return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
                               sign);
#endif

    bq = MPFR_PREC (b);
    cq = MPFR_PREC (c);

    MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */

    bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */
    cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */
    k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
    tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
    MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */

    /* Check for no size_t overflow*/
    MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
    MPFR_TMP_MARK (marker);
    tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB);

    /* multiplies two mantissa in temporary allocated space */
    if (MPFR_UNLIKELY (bn < cn))
    {
        mpfr_srcptr z = b;
        mp_size_t zn  = bn;
        b = c;
        bn = cn;
        c = z;
        cn = zn;
    }
    MPFR_ASSERTD (bn >= cn);
    if (MPFR_LIKELY (bn <= 2))
    {
        if (bn == 1)
        {
            /* 1 limb * 1 limb */
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            b1 = tmp[1];
        }
        else if (MPFR_UNLIKELY (cn == 1))
        {
            /* 2 limbs * 1 limb */
            mp_limb_t t;
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
            b1 = tmp[2];
        }
        else
        {
            /* 2 limbs * 2 limbs */
            mp_limb_t t1, t2, t3;
            /* First 2 limbs * 1 limb */
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
            /* Second, the other 2 limbs * 1 limb product */
            umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
            umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
            add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
            /* Sum those two partial products */
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
            tmp[3] += (tmp[2] < t1);
            b1 = tmp[3];
        }
        b1 >>= (GMP_NUMB_BITS - 1);
        tmp += k - tn;
        if (MPFR_UNLIKELY (b1 == 0))
            mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
    }
    else
        /* Mulders' mulhigh. Disable if squaring, since it is not tuned for
           such a case */
        if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))
Example #13
0
File: powm_ui.c Project: HRF92/mpir
void
mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
{
  mp_ptr xp, tp, qp, mp, bp;
  mp_size_t xn, tn, mn, bn;
  int m_zero_cnt;
  int c;
  mp_limb_t e;
  TMP_DECL;

  mp = PTR(m);
  mn = ABSIZ(m);
  if (mn == 0)
    DIVIDE_BY_ZERO;

  if (el == 0)
    {
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
	 depending on if MOD equals 1.  */
      SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
      PTR(r)[0] = 1;
      return;
    }

  TMP_MARK;

  /* Normalize m (i.e. make its most significant bit set) as required by
     division functions below.  */
  count_leading_zeros (m_zero_cnt, mp[mn - 1]);
  m_zero_cnt -= GMP_NAIL_BITS;
  if (m_zero_cnt != 0)
    {
      mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
      mpn_lshift (new_mp, mp, mn, m_zero_cnt);
      mp = new_mp;
    }

  bn = ABSIZ(b);
  bp = PTR(b);
  if (bn > mn)
    {
      /* Reduce possibly huge base.  Use a function call to reduce, since we
	 don't want the quotient allocation to live until function return.  */
      mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
      reduce (new_bp, bp, bn, mp, mn);
      bp = new_bp;
      bn = mn;
      /* Canonicalize the base, since we are potentially going to multiply with
	 it quite a few times.  */
      MPN_NORMALIZE (bp, bn);
    }

  if (bn == 0)
    {
      SIZ(r) = 0;
      TMP_FREE;
      return;
    }

  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
  xp = TMP_ALLOC_LIMBS (mn);

  qp = TMP_ALLOC_LIMBS (mn + 1);

  MPN_COPY (xp, bp, bn);
  xn = bn;

  e = el;
  count_leading_zeros (c, e);
  e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
  c = BITS_PER_MP_LIMB - 1 - c;

  /* Main loop. */

  /* If m is already normalized (high bit of high limb set), and b is the
     same size, but a bigger value, and e==1, then there's no modular
     reductions done and we can end up with a result out of range at the
     end. */
  if (c == 0)
    {
      if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
        mpn_sub_n (xp, xp, mp, mn);
      goto finishup;
    }

  while (c != 0)
    {
      mpn_sqr (tp, xp, xn);
      tn = 2 * xn; tn -= tp[tn - 1] == 0;
      if (tn < mn)
	{
	  MPN_COPY (xp, tp, tn);
	  xn = tn;
	}
      else
	{
	  mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
	  xn = mn;
	}

      if ((mp_limb_signed_t) e < 0)
	{
	  mpn_mul (tp, xp, xn, bp, bn);
	  tn = xn + bn; tn -= tp[tn - 1] == 0;
	  if (tn < mn)
	    {
	      MPN_COPY (xp, tp, tn);
	      xn = tn;
	    }
	  else
	    {
	      mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
	      xn = mn;
	    }
	}
      e <<= 1;
      c--;
    }

 finishup:
  /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing
     it with the original MOD.  */
  if (m_zero_cnt != 0)
    {
      mp_limb_t cy;
      cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
      tp[xn] = cy; xn += cy != 0;

      if (xn < mn)
	{
	  MPN_COPY (xp, tp, xn);
	}
      else
	{
	  mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn);
	  xn = mn;
	}
      mpn_rshift (xp, xp, xn, m_zero_cnt);
    }
  MPN_NORMALIZE (xp, xn);

  if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
    {
      mp = PTR(m);			/* want original, unnormalized m */
      mpn_sub (xp, mp, mn, xp, xn);
      xn = mn;
      MPN_NORMALIZE (xp, xn);
    }
  MPZ_REALLOC (r, xn);
  SIZ (r) = xn;
  MPN_COPY (PTR(r), xp, xn);

  TMP_FREE;
}
Example #14
0
// Cast num/den to an int, rounding towards nearest.  All inputs are destroyed.  Take a sqrt if desired.
// The values array must consist of r numerators followed by one denominator.
void snap_divs(RawArray<Quantized> result, RawArray<mp_limb_t,2> values, const bool take_sqrt) {
  assert(result.size()+1==values.m);

  // For division, we seek x s.t.
  //   x-1/2 <= num/den <= x+1/2
  //   2x-1 <= 2num/den <= 2x+1
  //   2x-1 <= floor(2num/den) <= 2x+1
  //   2x <= 1+floor(2num/den) <= 2x+2
  //   x <= (1+floor(2num/den))//2 <= x+1
  //   x = (1+floor(2num/den))//2

  // In the sqrt case, we seek a nonnegative integer x s.t.
  //   x-1/2 <= sqrt(num/den) < x+1/2
  //   2x-1 <= sqrt(4num/den) < 2x+1
  // Now the leftmost and rightmost expressions are integral, so we can take floors to get
  //   2x-1 <= floor(sqrt(4num/den)) < 2x+1
  // Since sqrt is monotonic and maps integers to integers, floor(sqrt(floor(x))) = floor(sqrt(x)), so
  //   2x-1 <= floor(sqrt(floor(4num/den))) < 2x+1
  //   2x <= 1+floor(sqrt(floor(4num/den))) < 2x+2
  //   x <= (1+floor(sqrt(floor(4num/den))))//2 < x+1
  //   x = (1+floor(sqrt(floor(4num/den))))//2

  // Thus, both cases look like
  //   x = (1+f(2**k*num/den))//2
  // where k = 1 or 2 and f is some truncating integer op (division or division+sqrt).

  // Adjust denominator to be positive
  const auto raw_den = values[result.size()];
  const bool den_negative = mp_limb_signed_t(raw_den.back())<0;
  if (den_negative)
    mpn_neg(raw_den.data(),raw_den.data(),raw_den.size());
  const auto den = trim(raw_den);
  assert(den.size()); // Zero should be prevented by the caller

  // Prepare for divisions
  const auto q = GEODE_RAW_ALLOCA(values.n-den.size()+1,mp_limb_t),
             r = GEODE_RAW_ALLOCA(den.size(),mp_limb_t);

  // Compute each component of the result
  for (int i=0;i<result.size();i++) {
    // Adjust numerator to be positive
    const auto num = values[i];
    const bool num_negative = mp_limb_signed_t(num.back())<0;
    if (take_sqrt && num_negative!=den_negative && !num.contains_only(0))
      throw RuntimeError("perturbed_ratio: negative value in square root");
    if (num_negative)
      mpn_neg(num.data(),num.data(),num.size());

    // Add enough bits to allow round-to-nearest computation after performing truncating operations
    mpn_lshift(num.data(),num.data(),num.size(),take_sqrt?2:1);
    // Perform division
    mpn_tdiv_qr(q.data(),r.data(),0,num.data(),num.size(),den.data(),den.size());
    const auto trim_q = trim(q);
    if (!trim_q.size()) {
      result[i] = 0;
      continue;
    }
    // Take sqrt if desired, reusing the num buffer
    const auto s = take_sqrt ? sqrt_helper(num,trim_q) : trim_q;

    // Verify that result lies in [-exact::bound,exact::bound];
    const int ratio = sizeof(ExactInt)/sizeof(mp_limb_t);
    static_assert(ratio<=2,"");
    if (s.size() > ratio)
      goto overflow;
    const auto nn = ratio==2 && s.size()==2 ? s[0]|ExactInt(s[1])<<8*sizeof(mp_limb_t) : s[0],
               n = (1+nn)/2;
    if (uint64_t(n) > uint64_t(exact::bound))
      goto overflow;

    // Done!
    result[i] = (num_negative==den_negative?1:-1)*Quantized(n);
  }

  return;
  overflow:
  throw OverflowError("perturbed_ratio: overflow in l'Hopital expansion");
}
Example #15
0
File: div_ui.c Project: Kirija/XPIR
/* returns 0 if result exact, non-zero otherwise */
int
mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mpfr_rnd_t rnd_mode)
{
  long i;
  int sh;
  mp_size_t xn, yn, dif;
  mp_limb_t *xp, *yp, *tmp, c, d;
  mpfr_exp_t exp;
  int inexact, middle = 1, nexttoinf;
  MPFR_TMP_DECL(marker);

  MPFR_LOG_FUNC
    (("x[%Pu]=%.*Rg u=%lu rnd=%d",
      mpfr_get_prec(x), mpfr_log_prec, x, u, rnd_mode),
     ("y[%Pu]=%.*Rg inexact=%d",
      mpfr_get_prec(y), mpfr_log_prec, y, inexact));

  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
    {
      if (MPFR_IS_NAN (x))
        {
          MPFR_SET_NAN (y);
          MPFR_RET_NAN;
        }
      else if (MPFR_IS_INF (x))
        {
          MPFR_SET_INF (y);
          MPFR_SET_SAME_SIGN (y, x);
          MPFR_RET (0);
        }
      else
        {
          MPFR_ASSERTD (MPFR_IS_ZERO(x));
          if (u == 0) /* 0/0 is NaN */
            {
              MPFR_SET_NAN(y);
              MPFR_RET_NAN;
            }
          else
            {
              MPFR_SET_ZERO(y);
              MPFR_SET_SAME_SIGN (y, x);
              MPFR_RET(0);
            }
        }
    }
  else if (MPFR_UNLIKELY (u <= 1))
    {
      if (u < 1)
        {
          /* x/0 is Inf since x != 0*/
          MPFR_SET_INF (y);
          MPFR_SET_SAME_SIGN (y, x);
          mpfr_set_divby0 ();
          MPFR_RET (0);
        }
      else /* y = x/1 = x */
        return mpfr_set (y, x, rnd_mode);
    }
  else if (MPFR_UNLIKELY (IS_POW2 (u)))
    return mpfr_div_2si (y, x, MPFR_INT_CEIL_LOG2 (u), rnd_mode);

  MPFR_SET_SAME_SIGN (y, x);

  MPFR_TMP_MARK (marker);
  xn = MPFR_LIMB_SIZE (x);
  yn = MPFR_LIMB_SIZE (y);

  xp = MPFR_MANT (x);
  yp = MPFR_MANT (y);
  exp = MPFR_GET_EXP (x);

  dif = yn + 1 - xn;

  /* we need to store yn+1 = xn + dif limbs of the quotient */
  /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */
  tmp = MPFR_TMP_LIMBS_ALLOC (yn + 1);

  c = (mp_limb_t) u;
  MPFR_ASSERTN (u == c);
  if (dif >= 0)
    c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */
  else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */
    c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c);

  inexact = (c != 0);

  /* First pass in estimating next bit of the quotient, in case of RNDN    *
   * In case we just have the right number of bits (postpone this ?),      *
   * we need to check whether the remainder is more or less than half      *
   * the divisor. The test must be performed with a subtraction, so as     *
   * to prevent carries.                                                   */

  if (MPFR_LIKELY (rnd_mode == MPFR_RNDN))
    {
      if (c < (mp_limb_t) u - c) /* We have u > c */
        middle = -1;
      else if (c > (mp_limb_t) u - c)
        middle = 1;
      else
        middle = 0; /* exactly in the middle */
    }

  /* If we believe that we are right in the middle or exact, we should check
     that we did not neglect any word of x (division large / 1 -> small). */

  for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++)
    if (xp[i])
      inexact = middle = 1; /* larger than middle */

  /*
     If the high limb of the result is 0 (xp[xn-1] < u), remove it.
     Otherwise, compute the left shift to be performed to normalize.
     In the latter case, we discard some low bits computed. They
     contain information useful for the rounding, hence the updating
     of middle and inexact.
  */

  if (tmp[yn] == 0)
    {
      MPN_COPY(yp, tmp, yn);
      exp -= GMP_NUMB_BITS;
    }
  else
    {
      int shlz;

      count_leading_zeros (shlz, tmp[yn]);

      /* shift left to normalize */
      if (MPFR_LIKELY (shlz != 0))
        {
          mp_limb_t w = tmp[0] << shlz;

          mpn_lshift (yp, tmp + 1, yn, shlz);
          yp[0] += tmp[0] >> (GMP_NUMB_BITS - shlz);

          if (w > (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)))
            { middle = 1; }
          else if (w < (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)))
            { middle = -1; }
          else
            { middle = (c != 0); }

          inexact = inexact || (w != 0);
          exp -= shlz;
        }
      else
        { /* this happens only if u == 1 and xp[xn-1] >=
Example #16
0
/*
   ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1  
   needs (tp, 2n) temp space, everything reduced mod 2^b 
   inputs, outputs are fully reduced
  
   N.B: 2n is not the same as 2b rounded up to nearest limb!
*/
inline static int
mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,
			                                         mpir_ui b, mp_ptr tp)
{
  mp_size_t n, k;
  mp_limb_t c;

  n = BITS_TO_LIMBS (b);
  k = GMP_NUMB_BITS * n - b;

  TMP_DECL;

  ASSERT(b > 0);
  ASSERT(n > 0);
  ASSERT_MPN(yp, n);
  ASSERT_MPN(zp, n);
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));
  ASSERT(!MPN_OVERLAP_P (xp, n, yp, n));
  ASSERT(!MPN_OVERLAP_P (xp, n, zp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);

#ifndef TUNE_PROGRAM_BUILD
  if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n))
  {
      mp_bitcnt_t depth1, depth = 1;
      mp_size_t w1, off;
      mp_ptr tx, ty, tz;

      TMP_MARK;

      tx = TMP_BALLOC_LIMBS(3*n + 3);
      ty = tx + n + 1;
      tz = ty + n + 1;

      MPN_COPY(ty, yp, n);
      MPN_COPY(tz, zp, n);
      ty[n] = 0;
      tz[n] = 0;

      while ((((mp_limb_t)1)<<depth) < b) depth++;
   
      if (depth < 12) off = mulmod_2expp1_table_n[0];
      else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
      depth1 = depth/2 - off;
   
      w1 = b/(((mp_limb_t)1)<<(2*depth1));

      mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1);

      MPN_COPY(xp, tx, n);
      TMP_FREE;

	   return tx[n];
  }
#endif

  if (yp == zp)
     mpn_sqr(tp, yp, n);
  else
     mpn_mul_n (tp, yp, zp, n);

  if (k == 0)
    {
      c = mpn_sub_n (xp, tp, tp + n, n);

      return mpn_add_1 (xp, xp, n, c);
    }

  c = tp[n - 1];
  tp[n - 1] &= GMP_NUMB_MASK >> k;

#if HAVE_NATIVE_mpn_sublsh_nc
  c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);
#else
  {
    mp_limb_t c1;
    c1 = mpn_lshift (tp + n, tp + n, n, k);
    tp[n] |= c >> (GMP_NUMB_BITS - k);
    c = mpn_sub_n (xp, tp, tp + n, n) + c1;
  }
#endif

  c = mpn_add_1 (xp, xp, n, c);
  xp[n - 1] &= GMP_NUMB_MASK >> k;

  return c;
}
Example #17
0
void
mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
{
  mpir_ui g_zero_bits, u_zero_bits, v_zero_bits;
  mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
  mp_ptr tp;
  mp_ptr up;
  mp_size_t usize;
  mp_ptr vp;
  mp_size_t vsize;
  mp_size_t gsize;
  TMP_DECL;

  up = PTR(u);
  usize = ABSIZ (u);
  vp = PTR(v);
  vsize = ABSIZ (v);
  /* GCD(0, V) == V.  */
  if (usize == 0)
    {
      SIZ (g) = vsize;
      if (g == v)
	return;
      MPZ_REALLOC (g, vsize);
      MPN_COPY (PTR (g), vp, vsize);
      return;
    }

  /* GCD(U, 0) == U.  */
  if (vsize == 0)
    {
      SIZ (g) = usize;
      if (g == u)
	return;
      MPZ_REALLOC (g, usize);
      MPN_COPY (PTR (g), up, usize);
      return;
    }

  if (usize == 1)
    {
      SIZ (g) = 1;
      PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]);
      return;
    }

  if (vsize == 1)
    {
      SIZ(g) = 1;
      PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]);
      return;
    }

  TMP_MARK;

  /*  Eliminate low zero bits from U and V and move to temporary storage.  */
  while (*up == 0)
    up++;
  u_zero_limbs = up - PTR(u);
  usize -= u_zero_limbs;
  count_trailing_zeros (u_zero_bits, *up);
  tp = up;
  up = TMP_ALLOC_LIMBS (usize);
  if (u_zero_bits != 0)
    {
      mpn_rshift (up, tp, usize, u_zero_bits);
      usize -= up[usize - 1] == 0;
    }
  else
    MPN_COPY (up, tp, usize);

  while (*vp == 0)
    vp++;
  v_zero_limbs = vp - PTR (v);
  vsize -= v_zero_limbs;
  count_trailing_zeros (v_zero_bits, *vp);
  tp = vp;
  vp = TMP_ALLOC_LIMBS (vsize);
  if (v_zero_bits != 0)
    {
      mpn_rshift (vp, tp, vsize, v_zero_bits);
      vsize -= vp[vsize - 1] == 0;
    }
  else
    MPN_COPY (vp, tp, vsize);

  if (u_zero_limbs > v_zero_limbs)
    {
      g_zero_limbs = v_zero_limbs;
      g_zero_bits = v_zero_bits;
    }
  else if (u_zero_limbs < v_zero_limbs)
    {
      g_zero_limbs = u_zero_limbs;
      g_zero_bits = u_zero_bits;
    }
  else  /*  Equal.  */
    {
      g_zero_limbs = u_zero_limbs;
      g_zero_bits = MIN (u_zero_bits, v_zero_bits);
    }

  /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */
  vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
    ? mpn_gcd (vp, vp, vsize, up, usize)
    : mpn_gcd (vp, up, usize, vp, vsize);

  /*  Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits).  */
  gsize = vsize + g_zero_limbs;
  if (g_zero_bits != 0)
    {
      mp_limb_t cy_limb;
      gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
      MPZ_REALLOC (g, gsize);
      MPN_ZERO (PTR (g), g_zero_limbs);

      tp = PTR(g) + g_zero_limbs;
      cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
      if (cy_limb != 0)
	tp[vsize] = cy_limb;
    }
Example #18
0
void
mpn_toom53_mul (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t n, s, t;
  int vm1_neg, vmh_neg;
  mp_limb_t cy;
  mp_ptr gp, hp;
  mp_ptr as1, asm1, as2, ash, asmh;
  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
  enum toom4_flags flags;
  TMP_DECL;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define a3  (ap + 3*n)
#define a4  (ap + 4*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);

  s = an - 4 * n;
  t = bn - 2 * n;

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  TMP_MARK;

  as1  = TMP_SALLOC_LIMBS (n + 1);
  asm1 = TMP_SALLOC_LIMBS (n + 1);
  as2  = TMP_SALLOC_LIMBS (n + 1);
  ash  = TMP_SALLOC_LIMBS (n + 1);
  asmh = TMP_SALLOC_LIMBS (n + 1);

  bs1  = TMP_SALLOC_LIMBS (n + 1);
  bsm1 = TMP_SALLOC_LIMBS (n + 1);
  bs2  = TMP_SALLOC_LIMBS (n + 1);
  bsh  = TMP_SALLOC_LIMBS (n + 1);
  bsmh = TMP_SALLOC_LIMBS (n + 1);

  gp = pp;
  hp = pp + n + 1;

  /* Compute as1 and asm1.  */
  gp[n]  = mpn_add_n (gp, a0, a2, n);
  gp[n] += mpn_add   (gp, gp, n, a4, s);
  hp[n]  = mpn_add_n (hp, a1, a3, n);
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#else
  mpn_add_n (as1, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_sub_n (asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#endif

  /* Compute as2.  */
#if !HAVE_NATIVE_mpn_addlsh_n
  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
#endif
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a3, a4, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy  = mpn_lshift (as2, a4, s, 1);
  cy += mpn_add_n (as2, a3, as2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
  cy += mpn_add_n (as2, a1, as2, n);
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
  mpn_add_n (as2, ash, as2, n + 1);
#endif

  /* Compute ash and asmh.  */
#if HAVE_NATIVE_mpn_addlsh_n
  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
  gp[n] = cy;
  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
  hp[n] = cy;
#else
  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
  hp[n] = cy;
#endif
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#else
  mpn_add_n (ash, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_sub_n (asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#endif

  /* Compute bs1 and bsm1.  */
  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
#if HAVE_NATIVE_mpn_addsub_n
  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
    {
      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
Example #19
0
void
mpn_div_q (mp_ptr qp,
	   mp_srcptr np, mp_size_t nn,
	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
{
  mp_ptr new_dp, new_np, tp, rp;
  mp_limb_t cy, dh, qh;
  mp_size_t new_nn, qn;
  gmp_pi1_t dinv;
  int cnt;
  TMP_DECL;
  TMP_MARK;

  ASSERT (nn >= dn);
  ASSERT (dn > 0);
  ASSERT (dp[dn - 1] != 0);
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));

  ASSERT_ALWAYS (FUDGE >= 2);

  if (dn == 1)
    {
      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
      return;
    }

  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */

  if (qn + FUDGE >= dn)
    {
      /* |________________________|
                          |_______|  */
      new_np = scratch;

      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np, nn, cnt);
	  new_np[nn] = cy;
	  new_nn = nn + (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (dn);
	  mpn_lshift (new_dp, dp, dn, cnt);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
	    {
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
	    {
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
	    }
	  if (cy == 0)
	    qp[qn - 1] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - dn;
	      for (i = 0; i < n; i++)
		qp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{
	  if (new_np != np)
	    MPN_COPY (new_np, np, nn);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
	    {
	      invert_pi1 (dinv, dh, dp[dn - 2]);
	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
	    {
	      invert_pi1 (dinv, dh, dp[dn - 2]);
	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
	    }
	  qp[nn - dn] = qh;
	}
    }
  else
    {
      /* |________________________|
                |_________________|  */
      tp = TMP_ALLOC_LIMBS (qn + 1);

      new_np = scratch;
      new_nn = 2 * qn + 1;
      if (new_np == np)
	/* We need {np,nn} to remain untouched until the final adjustment, so
	   we need to allocate separate space for new_np.  */
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);


      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
	  new_np[new_nn] = cy;

	  new_nn += (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

	  if (qn + 1 == 2)
	    {
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
	    {
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
	    {
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
	    }
	  if (cy == 0)
	    tp[qn] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - (qn + 1);
	      for (i = 0; i < n; i++)
		tp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{
Example #20
0
void
mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up,
		          mp_srcptr vp, mp_size_t n)
{
  mp_size_t ind;
  mp_limb_t cy, cy2, r30, r31;
  mp_ptr tp;
  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1;
  TMP_DECL;

  sn = (n + 3) / 4;

  h1 = n - 3*sn;
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)
#define b3 (vp + 3*sn)

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   TMP_MARK;

   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))
#define u6 (tp + 4*t4 + 4*(sn+1))

   u6[sn] = mpn_add(u6, a1, sn, a3, h1);
   u5[sn] = mpn_add_n(u5, a2, a0, sn);
   mpn_add_n(u3, u5, u6, sn + 1);
   n4 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u4, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u4, u6, u5, sn + 1);
      n4 = -n4;
   }

   u6[sn] = mpn_add(u6, b1, sn, b3, h1);
   u5[sn] = mpn_add_n(u5, b2, b0, sn);
   mpn_add_n(r2, u5, u6, sn + 1);
   n5 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u5, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u5, u6, u5, sn + 1);
      n5 = -n5;
   }
 
   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, a2, sn, 1);
   MPN_COPY(r2, a3, h1);
   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);
   cy = mpn_addmul_1(r2, a1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u5, r1, r2, sn + 1);
   n6 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(u6, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(u6, r2, r1, sn + 1);
      n6 = -n6;
   }
 
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, b3, b1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, b2, sn, 1);
   MPN_COPY(r2, b3, h1);
   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);
   cy = mpn_addmul_1(r2, b1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, b1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u2, r1, r2, sn + 1);
   n8 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(r2, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(r2, r2, r1, sn + 1);
      n8 = -n8;
   }
    
   r30 = r3[0];
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */
   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */
   r3[1] = r31;

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(u2, a2, a3, h1);
   if (sn > h1)
      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); 
   u2[sn] = cy;
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     
#else
   MPN_COPY(u2, a0, sn);
   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);
   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);
   cy = mpn_addmul_1(u2, a3, h1, 8);
   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);
   u2[sn] += cy;
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(r1, b2, b3, h1);
   if (sn > h1)
      cy = mpn_add_1(r1 + h1, b2 + h1, sn - h1, cy); 
   r1[sn] = cy;
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     
#else
   MPN_COPY(r1, b0, sn);
   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);
   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);
   cy = mpn_addmul_1(r1, b3, h1, 8);
   if (sn > h1) cy = mpn_add_1(r1 + h1, r1 + h1, sn - h1, cy);
   r1[sn] += cy;
#endif
   
   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */
   
   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h1); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */

   TC4_DENORM(r1, n1, t4 - 1);

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != 2*n) 
   {
	  MPN_ZERO((rp + rpn), 2*n - rpn);
   }

   TMP_FREE;
}
Example #21
0
void
mpn_toom63_mul (mp_ptr pp,
                mp_srcptr ap, mp_size_t an,
                mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
{
    mp_size_t n, s, t;
    mp_limb_t cy;
    int sign;

    /***************************** decomposition *******************************/
#define a5  (ap + 5 * n)
#define b0  (bp + 0 * n)
#define b1  (bp + 1 * n)
#define b2  (bp + 2 * n)

    ASSERT (an >= bn);
    n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);

    s = an - 5 * n;
    t = bn - 2 * n;

    ASSERT (0 < s && s <= n);
    ASSERT (0 < t && t <= n);
    /* WARNING! it assumes s+t>=n */
    ASSERT ( s + t >= n );
    ASSERT ( s + t > 4);
    /* WARNING! it assumes n>1 */
    ASSERT ( n > 2);

#define   r8    pp				/* 2n   */
#define   r7    scratch				/* 3n+1 */
#define   r5    (pp + 3*n)			/* 3n+1 */
#define   v0    (pp + 3*n)			/* n+1 */
#define   v1    (pp + 4*n+1)			/* n+1 */
#define   v2    (pp + 5*n+2)			/* n+1 */
#define   v3    (pp + 6*n+3)			/* n+1 */
#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
#define   r1    (pp + 7*n)			/* s+t <= 2*n */
#define   ws    (scratch + 6 * n + 2)		/* ??? */

    /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
       need all of them, when DO_mpn_sublsh_n usea a scratch  */
    /*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */

    /********************** evaluation and recursive calls *********************/
    /* $\pm4$ */
    sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
    pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
    /* FIXME: use addlsh */
    v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
    if ( n == t )
        v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
    else
        v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
    sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
    TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
    mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);

    /* $\pm1$ */
    sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
    /* Compute bs1 and bsm1. Code taken from toom33 */
    cy = mpn_add (ws, b0, n, b2, t);
#if HAVE_NATIVE_mpn_add_n_sub_n
    if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
    {
        cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
        v3[n] = cy >> 1;
        v1[n] = 0;
        sign = ~sign;
    }
Example #22
0
/* returns 0 if result exact, non-zero otherwise */
int
mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mp_rnd_t rnd_mode)
{
  long int xn, yn, dif, sh, i;
  mp_limb_t *xp, *yp, *tmp, c, d;
  mp_exp_t exp;
  int inexact, middle = 1;
  TMP_DECL(marker);

  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ))
    {
      if (MPFR_IS_NAN(x))
	{
	  MPFR_SET_NAN(y);
	  MPFR_RET_NAN;
	}
      else if (MPFR_IS_INF(x))
	{
	  MPFR_SET_INF(y);
	  MPFR_SET_SAME_SIGN(y, x);
	  MPFR_RET(0);
	}
      else
	{
          MPFR_ASSERTD(MPFR_IS_ZERO(x));
	  if (u == 0)/* 0/0 is NaN */
	    {
	      MPFR_SET_NAN(y);
	      MPFR_RET_NAN;
	    }
	  else
	    {
	      MPFR_SET_ZERO(y);
	      MPFR_RET(0);
	    }
	}
    }

  if (MPFR_UNLIKELY(u == 0))
    {
      /* x/0 is Inf */
      MPFR_SET_INF(y);
      MPFR_SET_SAME_SIGN(y, x);
      MPFR_RET(0);
    }

  MPFR_CLEAR_FLAGS(y);

  MPFR_SET_SAME_SIGN(y, x);

  TMP_MARK(marker);
  xn = MPFR_LIMB_SIZE(x);
  yn = MPFR_LIMB_SIZE(y);

  xp = MPFR_MANT(x);
  yp = MPFR_MANT(y);
  exp = MPFR_GET_EXP (x);

  dif = yn + 1 - xn;

  /* we need to store yn+1 = xn + dif limbs of the quotient */
  /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */
  tmp = (mp_limb_t*) TMP_ALLOC((yn + 1) * BYTES_PER_MP_LIMB);

  c = (mp_limb_t) u;
  MPFR_ASSERTN(u == c);
  if (dif >= 0)
    c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */
  else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */
    c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c);

  inexact = (c != 0);

  /* First pass in estimating next bit of the quotient, in case of RNDN    *
   * In case we just have the right number of bits (postpone this ?),      *
   * we need to check whether the remainder is more or less than half      *
   * the divisor. The test must be performed with a subtraction, so as     *
   * to prevent carries.                                                   */

  if (rnd_mode == GMP_RNDN)
    {
      if (c < (mp_limb_t) u - c) /* We have u > c */
	middle = -1;
      else if (c > (mp_limb_t) u - c)
	middle = 1;
      else
	middle = 0; /* exactly in the middle */
    }

  /* If we believe that we are right in the middle or exact, we should check
     that we did not neglect any word of x (division large / 1 -> small). */

  for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++)
    if (xp[i])
      inexact = middle = 1; /* larger than middle */

  /*
     If the high limb of the result is 0 (xp[xn-1] < u), remove it.
     Otherwise, compute the left shift to be performed to normalize.
     In the latter case, we discard some low bits computed. They
     contain information useful for the rounding, hence the updating
     of middle and inexact.
  */

  if (tmp[yn] == 0)
    {
      MPN_COPY(yp, tmp, yn);
      exp -= BITS_PER_MP_LIMB;
      sh = 0;
    }
  else
    {
      count_leading_zeros (sh, tmp[yn]);

      /* shift left to normalize */
      if (sh)
        {
          mp_limb_t w = tmp[0] << sh;

          mpn_lshift (yp, tmp + 1, yn, sh);
          yp[0] += tmp[0] >> (BITS_PER_MP_LIMB - sh);

          if (w > (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = 1; }
          else if (w < (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = -1; }
          else
            { middle = (c != 0); }

          inexact = inexact || (w != 0);
          exp -= sh;
        }
      else
        { /* this happens only if u == 1 and xp[xn-1] >=
             1<<(BITS_PER_MP_LIMB-1). It might be better to handle the
             u == 1 case seperately ?
          */

          MPN_COPY (yp, tmp + 1, yn);
        }
    }
Example #23
0
/* Obtain a sequence of random numbers.  */
static void
randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
{
    unsigned long int rbitpos;
    int chunk_nbits;
    mp_ptr tp;
    mp_size_t tn;
    gmp_rand_lc_struct *p;
    TMP_DECL;

    p = (gmp_rand_lc_struct *) RNG_STATE (rstate);

    TMP_MARK;

    chunk_nbits = p->_mp_m2exp / 2;
    tn = BITS_TO_LIMBS (chunk_nbits);

    tp = TMP_ALLOC_LIMBS (tn);

    rbitpos = 0;
    while (rbitpos + chunk_nbits <= nbits)
    {
        mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;

        if (rbitpos % GMP_NUMB_BITS != 0)
        {
            mp_limb_t savelimb, rcy;
            /* Target of new chunk is not bit aligned.  Use temp space
               and align things by shifting it up.  */
            lc (tp, rstate);
            savelimb = r2p[0];
            rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
            r2p[0] |= savelimb;
            /* bogus */
            if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
                    > GMP_NUMB_BITS)
                r2p[tn] = rcy;
        }
        else
        {
            /* Target of new chunk is bit aligned.  Let `lc' put bits
               directly into our target variable.  */
            lc (r2p, rstate);
        }
        rbitpos += chunk_nbits;
    }

    /* Handle last [0..chunk_nbits) bits.  */
    if (rbitpos != nbits)
    {
        mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
        int last_nbits = nbits - rbitpos;
        tn = BITS_TO_LIMBS (last_nbits);
        lc (tp, rstate);
        if (rbitpos % GMP_NUMB_BITS != 0)
        {
            mp_limb_t savelimb, rcy;
            /* Target of new chunk is not bit aligned.  Use temp space
               and align things by shifting it up.  */
            savelimb = r2p[0];
            rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
            r2p[0] |= savelimb;
            if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
                r2p[tn] = rcy;
        }
        else
        {
            MPN_COPY (r2p, tp, tn);
        }
        /* Mask off top bits if needed.  */
        if (nbits % GMP_NUMB_BITS != 0)
            rp[nbits / GMP_NUMB_BITS]
            &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
    }

    TMP_FREE;
}