Ejemplo n.º 1
0
int
_arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error,
                                                const arf_t x, mp_size_t wn)
{
    mp_srcptr xp;
    mp_size_t xn;
    int negative;
    slong exp;

    ARF_GET_MPN_READONLY(xp, xn, x);
    exp = ARF_EXP(x);
    negative = ARF_SGNBIT(x);

    if (exp <= -1)
    {
        /* todo: just zero top */
        flint_mpn_zero(w, wn);

        *error = _arf_get_integer_mpn(w, xp, xn, exp + wn * FLINT_BITS);

        if (!negative)
        {
            fmpz_zero(q);
        }
        else
        {
            if (wn > ARB_LOG_TAB2_LIMBS)
                return 0;

            mpn_sub_n(w, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - wn, w, wn);
            *error += 1;    /* log(2) has 1 ulp error */
            fmpz_set_si(q, -1);
        }

        return 1; /* success */
    }
    else
    {
        mp_ptr qp, rp, np;
        mp_srcptr dp;
        mp_size_t qn, rn, nn, dn, tn, alloc;
        TMP_INIT;

        tn = ((exp + 2) + FLINT_BITS - 1) / FLINT_BITS;

        dn = wn + tn;           /* denominator */
        nn = wn + 2 * tn;       /* numerator */
        qn = nn - dn + 1;       /* quotient */
        rn = dn;                /* remainder */

        if (dn > ARB_LOG_TAB2_LIMBS)
            return 0;

        TMP_START;

        alloc = qn + rn + nn;
        qp = TMP_ALLOC_LIMBS(alloc);
        rp = qp + qn;
        np = rp + rn;

        dp = arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - dn;

        /* todo: prove that zeroing is unnecessary */
        flint_mpn_zero(np, nn);

        _arf_get_integer_mpn(np, xp, xn, exp + dn * FLINT_BITS);

        mpn_tdiv_qr(qp, rp, 0, np, nn, dp, dn);

        if (!negative)
        {
            flint_mpn_copyi(w, rp + tn, wn);
            *error = 2;
        }
        else
        {
            if (mpn_add_1(qp, qp, qn, 1))
            {
                /* I believe this cannot happen (should prove it) */
                flint_printf("mod log(2): unexpected carry\n");
                abort();
            }

            mpn_sub_n(w, dp + tn, rp + tn, wn);
            *error = 3;
        }

        /* read the exponent */
        while (qn > 1 && qp[qn-1] == 0)
            qn--;

        if (qn == 1)
        {
            if (!negative)
                fmpz_set_ui(q, qp[0]);
            else
                fmpz_neg_ui(q, qp[0]);
        }
        else
        {
            fmpz_set_mpn_large(q, qp, qn, negative);
        }

        TMP_END;

        return 1;
    }
}
Ejemplo n.º 2
0
void
mpz_mfac_uiui (mpz_ptr x, unsigned long n, unsigned long m)
{
  ASSERT (n <= GMP_NUMB_MAX);
  ASSERT (m != 0);

  if (n < 3 || n - 3 < m - 1) { /* (n < 3 || n - 1 <= m || m == 0) */
    PTR (x)[0] = n + (n == 0);
    SIZ (x) = 1;
  } else { /* m < n - 1 < GMP_NUMB_MAX */
    mp_limb_t g, sn;
    mpz_t     t;

    sn = n;
    g = mpn_gcd_1 (&sn, 1, m);
    if (g != 1) { n/=g; m/=g; }

    if (m <= 2) { /* fac or 2fac */
      if (m == 1) {
	if (g > 2) {
	  mpz_init (t);
	  mpz_fac_ui (t, n);
	  sn = n;
	} else {
	  if (g == 2)
	    mpz_2fac_ui (x, n << 1);
	  else
	    mpz_fac_ui (x, n);
	  return;
	}
      } else { /* m == 2 */
	if (g != 1) {
	  mpz_init (t);
	  mpz_2fac_ui (t, n);
	  sn = n / 2 + 1;
	} else {
	  mpz_2fac_ui (x, n);
	  return;
	}
      }
    } else { /* m >= 3, gcd(n,m) = 1 */
      mp_limb_t *factors;
      mp_limb_t prod, max_prod, j;
      TMP_DECL;

      sn = n / m + 1;

      j = 0;
      prod = n;
      n -= m;
      max_prod = GMP_NUMB_MAX / n;

      TMP_MARK;
      factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2);

      for (; n > m; n -= m)
	FACTOR_LIST_STORE (n, prod, max_prod, factors, j);

      factors[j++] = n;
      factors[j++] = prod;

      if (g > 1) {
	mpz_init (t);
	mpz_prodlimbs (t, factors, j);
      } else
	mpz_prodlimbs (x, factors, j);

      TMP_FREE;
    }

    if (g > 1) {
      mpz_t p;

      mpz_init (p);
      mpz_ui_pow_ui (p, g, sn); /* g^sn */
      mpz_mul (x, p, t);
      mpz_clear (p);
      mpz_clear (t);
    }
  }
}
Ejemplo n.º 3
0
mp_limb_t
mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
{
  mp_size_t q_orig, qn, sh, sl, i;
  mp_limb_t qh, cy, cy2;
  mp_ptr tp;
  TMP_DECL;

  ASSERT (dn >= 6);
  ASSERT (nn >= dn + 3);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }
  q_orig = qn;

  qh = mpn_cmp(np + nn - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);

  np += nn - dn - qn;
  nn = dn + qn;

  /* Reduce until dn - 1 >= qn */
  while (dn - 1 < qn)
  {
     sh = MIN(dn, qn - dn + 1);
     if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     qn -= sh; nn -= sh; 
  }

  cy = np[nn - 1];

  /* split into two parts */
  sh = qn/2; sl = qn - sh;

  /* Rare case where truncation ruins normalisation */
  if (cy > dp[dn - 1] || (cy == dp[dn - 1] 
     && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0))
     {
        __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn);
        return qh;
     }

  if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0)
     __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh);
  else
  {
     if (sh < SB_DIVAPPR_Q_CUTOFF)
        mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
     else
        mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
  }

  cy = np[nn - sh];

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS(sl + 2);

  mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh);
  cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2);

  TMP_FREE;

  while ((mp_limb_signed_t) cy < 0)
  {
      
     qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */
     
     /*
        correct remainder, noting that "digits" of quotient aren't base B
        but in base varying with truncation, thus correction needs fixup
     */
     cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); 

     for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++)
        cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]);
  }
   
  if (cy != 0) /* special case: unable to canonicalise */
     __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl);
  else
  {
     if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0)
        __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl);
     else
     {
        if (sl < SB_DIVAPPR_Q_CUTOFF)
           mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv);
        else
           mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv);
     }

  }

  return qh;
}
Ejemplo n.º 4
0
void
mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
{
  mp_ptr rootp, up, remp;
  mp_size_t us, un, rootn, remn;
  TMP_DECL;

  us = SIZ(u);

  /* even roots of negatives provoke an exception */
  if (us < 0 && (nth & 1) == 0)
    SQRT_OF_NEGATIVE;

  /* root extraction interpreted as c^(1/nth) means a zeroth root should
     provoke a divide by zero, do this even if c==0 */
  if (nth == 0)
    DIVIDE_BY_ZERO;

  if (us == 0)
    {
      if (root != NULL)
	SIZ(root) = 0;
      SIZ(rem) = 0;
      return;
    }

  un = ABS (us);
  rootn = (un - 1) / nth + 1;

  TMP_MARK;

  /* FIXME: Perhaps disallow root == NULL */
  if (root != NULL && u != root)
    rootp = MPZ_REALLOC (root, rootn);
  else
    rootp = TMP_ALLOC_LIMBS (rootn);

  if (u != rem)
    remp = MPZ_REALLOC (rem, un);
  else
    remp = TMP_ALLOC_LIMBS (un);

  up = PTR(u);

  if (nth == 1)
    {
      MPN_COPY (rootp, up, un);
      remn = 0;
    }
  else
    {
      remn = mpn_rootrem (rootp, remp, up, un, (mp_limb_t) nth);
    }

  if (root != NULL)
    {
      SIZ(root) = us >= 0 ? rootn : -rootn;
      if (u == root)
	MPN_COPY (up, rootp, rootn);
      else if (u == rem)
	MPN_COPY (up, remp, remn);
    }

  SIZ(rem) = us < 0 && remn > 0 ? -remn : remn;
  TMP_FREE;
}
Ejemplo n.º 5
0
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
    mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N,
    int sinonly, int alternating)
{
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;
    int cosorsin;

    TMP_INIT;
    TMP_START;

    if (2 * N >= FACTORIAL_TAB_SIZE - 1)
    {
        flint_printf("_arb_sin_cos_taylor_rs: N too large!\n");
        abort();
    }

    if (N <= 1)
    {
        if (N == 0)
        {
            flint_mpn_zero(ysin, xn);
            if (!sinonly) flint_mpn_zero(ycos, xn);
            error[0] = 0;
        }
        else if (N == 1)
        {
            flint_mpn_copyi(ysin, x, xn);
            if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES);
            error[0] = 1;
        }
    }
    else
    {
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        mpn_sqr(XPOW_WRITE(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
        {
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);
        }

        for (cosorsin = sinonly; cosorsin < 2; cosorsin++)
        {
            flint_mpn_zero(s, xn + 1);

            /* todo: skip one nonscalar multiplication (use x^m)
               when starting on x^0 */
            power = (N - 1) % m;

            for (k = N - 1; k >= 0; k--)
            {
                c = factorial_tab_numer[2 * k + cosorsin];
                new_denom = factorial_tab_denom[2 * k + cosorsin];
                old_denom = factorial_tab_denom[2 * k + cosorsin + 2];

                /* change denominators */
                if (new_denom != old_denom && k < N - 1)
                {
                    if (alternating && (k % 2 == 0))
                        s[xn] += old_denom;

                    mpn_divrem_1(s, 0, s, xn + 1, old_denom);

                    if (alternating && (k % 2 == 0))
                        s[xn] -= 1;
                }

                if (power == 0)
                {
                    /* add c * x^0 -- only top limb is affected */
                    if (alternating & k)
                        s[xn] -= c;
                    else
                        s[xn] += c;

                    /* Outer polynomial evaluation: multiply by x^m */
                    if (k != 0)
                    {
                        mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                        flint_mpn_copyi(s, t + xn, xn + 1);
                    }

                    power = m - 1;
                }
                else
                {
                    if (alternating & k)
                        s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c);
                    else
                        s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);

                    power--;
                }
            }

            /* finally divide by denominator */
            if (cosorsin == 0)
            {
                mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]);

                /* perturb down to a number < 1 if necessary. note that this
                   does not invalidate the error bound: 1 - ulp is either
                   1 ulp too small or must be closer to the exact value */
                if (t[xn] == 0)
                    flint_mpn_copyi(ycos, t, xn);
                else
                    flint_mpn_store(ycos, xn, LIMB_ONES);
            }
            else
            {
                mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]);
                mpn_mul(t, s, xn + 1, x, xn);
                flint_mpn_copyi(ysin, t + xn, xn);
            }
        }

        /* error bound (ulp) */
        error[0] = 2;
    }

    TMP_END;
}
Ejemplo n.º 6
0
size_t
mpz_inp_raw (mpz_ptr x, FILE *fp)
{
  unsigned char  csize_bytes[4];
  mp_size_t      csize, abs_xsize, i;
  size_t         abs_csize;
  char           *cp;
  mp_ptr         xp, sp, ep;
  mp_limb_t      slimb, elimb;

  if (fp == 0)
    fp = stdin;

  /* 4 bytes for size */
  if (fread (csize_bytes, sizeof (csize_bytes), 1, fp) != 1)
    return 0;

  csize =
    (  (mp_size_t) csize_bytes[0] << 24)
    + ((mp_size_t) csize_bytes[1] << 16)
    + ((mp_size_t) csize_bytes[2] << 8)
    + ((mp_size_t) csize_bytes[3]);

  /* Sign extend if necessary.
     Could write "csize -= ((csize & 0x80000000L) << 1)", but that tickles a
     bug in gcc 3.0 for powerpc64 on AIX.  */
  if (sizeof (csize) > 4 && csize & 0x80000000L)
    csize -= 0x80000000L << 1;

  abs_csize = ABS (csize);

  /* round up to a multiple of limbs */
  abs_xsize = BITS_TO_LIMBS (abs_csize*8);

  if (abs_xsize != 0)
    {
      xp = MPZ_NEWALLOC (x, abs_xsize);

      /* Get limb boundaries right in the read, for the benefit of the
	 non-nails case.  */
      xp[0] = 0;
      cp = (char *) (xp + abs_xsize) - abs_csize;
      if (fread (cp, abs_csize, 1, fp) != 1)
	return 0;

      if (GMP_NAIL_BITS == 0)
	{
	  /* Reverse limbs to least significant first, and byte swap.  If
	     abs_xsize is odd then on the last iteration elimb and slimb are
	     the same.  It doesn't seem extra code to handle that case
	     separately, to save an NTOH.  */
	  sp = xp;
	  ep = xp + abs_xsize-1;
	  for (i = 0; i < (abs_xsize+1)/2; i++)
	    {
	      NTOH_LIMB_FETCH (elimb, ep);
	      NTOH_LIMB_FETCH (slimb, sp);
	      *sp++ = elimb;
	      *ep-- = slimb;
	    }
	}
      else
	{
	  /* It ought to be possible to do the transformation in-place, but
	     for now it's easier to use an extra temporary area.  */
	  mp_limb_t  byte, limb;
	  int	     bits;
	  mp_size_t  tpos;
	  mp_ptr     tp;
	  TMP_DECL;

	  TMP_MARK;
	  tp = TMP_ALLOC_LIMBS (abs_xsize);
	  limb = 0;
	  bits = 0;
	  tpos = 0;
	  for (i = abs_csize-1; i >= 0; i--)
	    {
	      byte = (unsigned char) cp[i];
	      limb |= (byte << bits);
	      bits += 8;
	      if (bits >= GMP_NUMB_BITS)
		{
		  ASSERT (tpos < abs_xsize);
		  tp[tpos++] = limb & GMP_NUMB_MASK;
		  bits -= GMP_NUMB_BITS;
		  ASSERT (bits < 8);
		  limb = byte >> (8 - bits);
		}
	    }
	  if (bits != 0)
	    {
	      ASSERT (tpos < abs_xsize);
	      tp[tpos++] = limb;
	    }
	  ASSERT (tpos == abs_xsize);

	  MPN_COPY (xp, tp, abs_xsize);
	  TMP_FREE;
	}
Ejemplo n.º 7
0
/* 
   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is
   either correct or one too large. We require dp to be normalised and inv
   to be a precomputed inverse given by mpn_invert.
*/
mp_limb_t 
mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, 
                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);
   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }
   
   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy + 1);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	   N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
    
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }
  
   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) 
   {
	   /* Special case, multiply out to get accurate quotient */
	   ret -= mpn_sub_1(qp, qp, dn, 1);
      if (UNLIKELY(ret == ~CNST_LIMB(0)))
         ret += mpn_add_1(qp, qp, dn, 1);
      
      /* ret is now guaranteed to be 0 */
      ASSERT(ret == 0);
       
      mpn_mul_n(tp, qp, dp, dn);
      mpn_sub_n(tp, np, tp, dn+1);
      while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)
	   {
		   ret += mpn_add_1(qp, qp, dn, 1);
		   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);
	   }
       
      /* Not possible for ret == 2 as we have qp*dp <= np */
      ASSERT(ret + ret2 < 2);
   }

   TMP_FREE;

   return ret + ret2;
}
Ejemplo n.º 8
0
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.

   Iterates

     r' <-- r - r * (a^{k-1} r^k - 1) / n

   If

     a^{k-1} r^k = 1 (mod 2^m),

   then

     a^{k-1} r'^k = 1 (mod 2^{2m}),

   Compute the update term as

     r' = r - (a^{k-1} r^{k+1} - r) / k

   where we still have cancelation of low limbs.

 */
void
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
{
  mp_size_t sizes[GMP_LIMB_BITS * 2];
  mp_ptr akm1, tp, rnp, ep, scratch;
  mp_limb_t a0, r0, km1, kp1h, kinv;
  mp_size_t rn;
  unsigned i;

  TMP_DECL;

  ASSERT (n > 0);
  ASSERT (ap[0] & 1);
  ASSERT (k & 1);
  ASSERT (k >= 3);

  TMP_MARK;

  akm1 = TMP_ALLOC_LIMBS (4*n);
  tp = akm1 + n;

  km1 = k-1;
  /* FIXME: Could arrange the iteration so we don't need to compute
     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     that we can use wraparound also for a*r, since the low half is
     unchanged from the previous iteration. Or possibly mulmid. Also,
     a r = a^{1/k}, so we get that value too, for free? */
  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */

  a0 = ap[0];
  binvert_limb (kinv, k);

  /* 4 bits: a^{1/k - 1} (mod 16):

	a % 8
	1 3 5 7
   k%4 +-------
     1 |1 1 1 1
     3 |1 9 9 1
  */
  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
#if GMP_NUMB_BITS > 32
  {
    unsigned prec = 32;
    do
      {
	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
	prec *= 2;
      }
    while (prec < GMP_NUMB_BITS);
  }
#endif

  rp[0] = r0;
  if (n == 1)
    {
      TMP_FREE;
      return;
    }

  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
  kp1h = k/2 + 1;

  /* FIXME: Special case for two limb iteration. */
  rnp = TMP_ALLOC_LIMBS (2*n + 1);
  ep = rnp + n;

  /* FIXME: Possible to this on the fly with some bit fiddling. */
  for (i = 0; n > 1; n = (n + 1)/2)
    sizes[i++] = n;

  rn = 1;

  while (i-- > 0)
    {
      /* Compute x^{k+1}. */
      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
			       final iteration.*/
      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);

      /* Multiply by a^{k-1}. Can use wraparound; low part equals
	 r. */

      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
      ASSERT (mpn_cmp (ep, rp, rn) == 0);

      ASSERT (sizes[i] <= 2*rn);
      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
      rn = sizes[i];
    }
  TMP_FREE;
}
Ejemplo n.º 9
0
Archivo: log.c Proyecto: isuruf/arb
void
arb_log_arf(arb_t z, const arf_t x, slong prec)
{
    if (arf_is_special(x))
    {
        if (arf_is_pos_inf(x))
            arb_pos_inf(z);
        else
            arb_indeterminate(z);
    }
    else if (ARF_SGNBIT(x))
    {
        arb_indeterminate(z);
    }
    else if (ARF_IS_POW2(x))
    {
        if (fmpz_is_one(ARF_EXPREF(x)))
        {
            arb_zero(z);
        }
        else
        {
            fmpz_t exp;
            fmpz_init(exp);
            _fmpz_add_fast(exp, ARF_EXPREF(x), -1);
            arb_const_log2(z, prec + 2);
            arb_mul_fmpz(z, z, exp, prec);
            fmpz_clear(exp);
        }
    }
    else if (COEFF_IS_MPZ(*ARF_EXPREF(x)))
    {
        arb_log_arf_huge(z, x, prec);
    }
    else
    {
        slong exp, wp, wn, N, r, closeness_to_one;
        mp_srcptr xp;
        mp_size_t xn, tn;
        mp_ptr tmp, w, t, u;
        mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy;
        int negative, inexact, used_taylor_series;
        TMP_INIT;

        exp = ARF_EXP(x);
        negative = 0;

        ARF_GET_MPN_READONLY(xp, xn, x);

        /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */
        closeness_to_one = 0;

        if (exp == 0)
        {
            slong i;

            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]);

            if (closeness_to_one == FLINT_BITS)
            {
                for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i]));
            }
        }
        else if (exp == 1)
        {
            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP));

            if (closeness_to_one == FLINT_BITS)
            {
                slong i;

                for (i = xn - 2; xp[i] == 0; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i]));
            }

            closeness_to_one--;
        }

        /* if |t-1| <= 0.5               */
        /* |log(1+t) - t| <= t^2         */
        /* |log(1+t) - (t-t^2/2)| <= t^3 */
        if (closeness_to_one > prec + 1)
        {
            inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
            return;
        }
        else if (2 * closeness_to_one > prec + 1)
        {
            arf_t t, u;
            arf_init(t);
            arf_init(u);
            arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul_2exp_si(u, u, -1);
            inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
            arf_clear(t);
            arf_clear(u);
            return;
        }

        /* Absolute working precision (NOT rounded to a limb multiple) */
        wp = prec + closeness_to_one + 5;

        /* Too high precision to use table */
        if (wp > ARB_LOG_TAB2_PREC)
        {
            arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND);
            arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec);
            return;
        }

        /* Working precision in limbs */
        wn = (wp + FLINT_BITS - 1) / FLINT_BITS;

        TMP_START;

        tmp = TMP_ALLOC_LIMBS(4 * wn + 3);
        w = tmp;        /* requires wn+1 limbs */
        t = w + wn + 1; /* requires wn+1 limbs */
        u = t + wn + 1; /* requires 2wn+1 limbs */

        /* read x-1 */
        if (xn <= wn)
        {
            flint_mpn_zero(w, wn - xn);
            mpn_lshift(w + wn - xn, xp, xn, 1);
            error = 0;
        }
        else
        {
            mpn_lshift(w, xp + xn - wn, wn, 1);
            error = 1;
        }

        /* First table-based argument reduction */
        if (wp <= ARB_LOG_TAB1_PREC)
            q1bits = ARB_LOG_TAB11_BITS;
        else
            q1bits = ARB_LOG_TAB21_BITS;

        p1 = w[wn-1] >> (FLINT_BITS - q1bits);

        /* Special case: covers logarithms of small integers */
        if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits))))
        {
            p2 = 0;
            flint_mpn_zero(t, wn);
            used_taylor_series = 0;
            N = r = 0; /* silence compiler warning */
        }
        else
        {
            /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */
            w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1;
            mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits));
            error += 1;

            /* Second table-based argument reduction (fused with log->atanh
               conversion) */
            if (wp <= ARB_LOG_TAB1_PREC)
                q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS;
            else
                q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS;

            p2 = w[wn-1] >> (FLINT_BITS - q2bits);

            u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits);
            flint_mpn_zero(u, wn);
            flint_mpn_copyi(t, u + wn, wn + 1);
            t[wn] += p2 + (UWORD(1) << (q2bits + 1));
            u[2 * wn] -= p2;
            mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1);

            /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */
            error += 3;

            /* |w| <= 2^-r */
            r = _arb_mpn_leading_zeros(w, wn);

            /* N >= (wp-r)/(2r) */
            N = (wp - r + (2*r-1)) / (2*r);
            N = FLINT_MAX(N, 0);

            /* Evaluate Taylor series */
            _arb_atan_taylor_rs(t, &error2, w, wn, N, 0);
            /* Multiply by 2 */
            mpn_lshift(t, t, wn, 1);
            /* Taylor series evaluation error (multiply by 2) */
            error += error2 * 2;

            used_taylor_series = 1;
        }

        /* Size of output number */
        tn = wn;

        /* First table lookup */
        if (p1 != 0)
        {
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn);
            else
                mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn);
            error++;
        }

        /* Second table lookup */
        if (p2 != 0)
        {
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn);
            else
                mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn);
            error++;
        }

        /* add exp * log(2) */
        exp--;

        if (exp > 0)
        {
            cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp);
            t[tn] = cy;
            tn += (cy != 0);
            error += exp;
        }
        else if (exp < 0)
        {
            t[tn] = 0;
            u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp);

            if (mpn_cmp(t, u, tn + 1) >= 0)
            {
                mpn_sub_n(t, t, u, tn + 1);
            }
            else
            {
                mpn_sub_n(t, u, t, tn + 1);
                negative = 1;
            }

            error += (-exp);

            tn += (t[tn] != 0);
        }

        /* The accumulated arithmetic error */
        mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS);

        /* Truncation error from the Taylor series */
        if (used_taylor_series)
            mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1);

        /* Set the midpoint */
        inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec);
        if (inexact)
            arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);

        TMP_END;
    }
}
Ejemplo n.º 10
0
mp_bitcnt_t
mpn_remove (mp_ptr wp, mp_size_t *wn,
	    mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
	    mp_bitcnt_t cap)
{
  mp_ptr    pwpsp[LOG];
  mp_size_t pwpsn[LOG];
  mp_size_t npowers;
  mp_ptr tp, qp, np, pp, qp2;
  mp_size_t pn, nn, qn, i;
  mp_bitcnt_t pwr;
  TMP_DECL;

  ASSERT (un > 0);
  ASSERT (vn > 0);
  ASSERT (vp[0] % 2 != 0);	/* 2-adic division wants odd numbers */
  ASSERT (vn > 1 || vp[0] > 1);	/* else we would loop indefinitely */

  TMP_MARK;

  tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
  qp = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
  qp2 = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
  pp = vp;
  pn = vn;

  MPN_COPY (qp, up, un);
  qn = un;

  npowers = 0;
  while (qn >= pn)
    {
      qp[qn] = 0;
      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
      if (!mpn_zero_p (tp, pn))
	break;			/* could not divide by V^npowers */

      MP_PTR_SWAP (qp, qp2);
      qn = qn - pn;
      qn += qp[qn] != 0;

      pwpsp[npowers] = pp;
      pwpsn[npowers] = pn;
      npowers++;

      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
	break;

      nn = 2 * pn - 1;		/* next power will be at least this large */
      if (nn > qn)
	break;			/* next power would be overlarge */

      if (npowers == 1)		/* Alloc once, but only if it's needed */
	np = TMP_ALLOC_LIMBS (qn + LOG);	/* powers of V */
      else
	np += pn;

      mpn_sqr (np, pp, pn);
      pn = nn + (np[nn] != 0);
      pp = np;
    }

  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;

  for (i = npowers - 1; i >= 0; i--)
    {
      pn = pwpsn[i];
      if (qn < pn)
	continue;

      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
	continue;		/* V^i would bring us past cap */

      qp[qn] = 0;
      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
      if (!mpn_zero_p (tp, pn))
	continue;		/* could not divide by V^i */

      MP_PTR_SWAP (qp, qp2);
      qn = qn - pn;
      qn += qp[qn] != 0;

      pwr += (mp_bitcnt_t) 1 << i;
    }

  MPN_COPY (wp, qp, qn);
  *wn = qn;

  TMP_FREE;

  return pwr;
}
Ejemplo n.º 11
0
void
mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
{
  mp_size_t ql;
  mp_size_t ns, ds, nl, dl;
  mp_ptr np, dp, qp, rp;
  TMP_DECL;

  ns = SIZ (num);
  ds = SIZ (den);
  nl = ABS (ns);
  dl = ABS (ds);
  ql = nl - dl + 1;

  if (UNLIKELY (dl == 0))
    DIVIDE_BY_ZERO;

  rp = MPZ_REALLOC (rem, dl);

  if (ql <= 0)
    {
      if (num != rem)
	{
	  np = PTR (num);
	  MPN_COPY (rp, np, nl);
	  SIZ (rem) = SIZ (num);
	}
      return;
    }

  TMP_MARK;
  qp = TMP_ALLOC_LIMBS (ql);
  np = PTR (num);
  dp = PTR (den);

  /* FIXME: We should think about how to handle the temporary allocation.
     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
     allocate temp space.  */

  /* Copy denominator to temporary space if it overlaps with the remainder.  */
  if (dp == rp)
    {
      mp_ptr tp;
      tp = TMP_ALLOC_LIMBS (dl);
      MPN_COPY (tp, dp, dl);
      dp = tp;
    }
  /* Copy numerator to temporary space if it overlaps with the remainder.  */
  if (np == rp)
    {
      mp_ptr tp;
      tp = TMP_ALLOC_LIMBS (nl);
      MPN_COPY (tp, np, nl);
      np = tp;
    }

  mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);

  MPN_NORMALIZE (rp, dl);

  SIZ (rem) = ns >= 0 ? dl : -dl;
  TMP_FREE;
}
Ejemplo n.º 12
0
Archivo: set_q.c Proyecto: Cl3Kener/gmp
void
mpf_set_q (mpf_t r, mpq_srcptr q)
{
  mp_srcptr np, dp;
  mp_size_t prec, nsize, dsize, qsize, prospective_qsize, tsize, zeros;
  mp_size_t sign_quotient, high_zero;
  mp_ptr qp, tp;
  mp_exp_t exp;
  TMP_DECL;

  ASSERT (SIZ(&q->_mp_den) > 0);  /* canonical q */

  nsize = SIZ (&q->_mp_num);
  dsize = SIZ (&q->_mp_den);

  if (UNLIKELY (nsize == 0))
    {
      SIZ (r) = 0;
      EXP (r) = 0;
      return;
    }

  TMP_MARK;

  prec = PREC (r);
  qp = PTR (r);

  sign_quotient = nsize;
  nsize = ABS (nsize);
  np = PTR (&q->_mp_num);
  dp = PTR (&q->_mp_den);

  prospective_qsize = nsize - dsize + 1;  /* q from using given n,d sizes */
  exp = prospective_qsize;                /* ie. number of integer limbs */
  qsize = prec + 1;                       /* desired q */

  zeros = qsize - prospective_qsize;      /* n zeros to get desired qsize */
  tsize = nsize + zeros;                  /* size of intermediate numerator */
  tp = TMP_ALLOC_LIMBS (tsize + 1);       /* +1 for mpn_div_q's scratch */

  if (zeros > 0)
    {
      /* pad n with zeros into temporary space */
      MPN_ZERO (tp, zeros);
      MPN_COPY (tp+zeros, np, nsize);
      np = tp;                            /* mpn_div_q allows this overlap */
    }
  else
    {
      /* shorten n to get desired qsize */
      np -= zeros;
    }

  ASSERT (tsize-dsize+1 == qsize);
  mpn_div_q (qp, np, tsize, dp, dsize, tp);

  /* strip possible zero high limb */
  high_zero = (qp[qsize-1] == 0);
  qsize -= high_zero;
  exp -= high_zero;

  EXP (r) = exp;
  SIZ (r) = sign_quotient >= 0 ? qsize : -qsize;

  TMP_FREE;
}
Ejemplo n.º 13
0
void
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
{
    mp_ptr xp, tp, qp, gp, this_gp;
    mp_srcptr bp, ep, mp;
    mp_size_t bn, es, en, mn, xn;
    mp_limb_t invm, c;
    mpir_ui enb;
    mp_size_t i, K, j, l, k;
    int m_zero_cnt, e_zero_cnt;
    int sh;
    int use_redc;
#if HANDLE_NEGATIVE_EXPONENT
    mpz_t new_b;
#endif
#if REDUCE_EXPONENT
    mpz_t new_e;
#endif
    TMP_DECL;

    mp = PTR(m);
    mn = ABSIZ (m);
    if (mn == 0)
        DIVIDE_BY_ZERO;

    TMP_MARK;

    es = SIZ (e);
    if (es <= 0)
    {
        if (es == 0)
        {
            /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
               m equals 1.  */
            SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
            PTR(r)[0] = 1;
            TMP_FREE;	/* we haven't really allocated anything here */
            return;
        }
#if HANDLE_NEGATIVE_EXPONENT
        MPZ_TMP_INIT (new_b, mn + 1);

        if (! mpz_invert (new_b, b, m))
            DIVIDE_BY_ZERO;
        b = new_b;
        es = -es;
#else
        DIVIDE_BY_ZERO;
#endif
    }
    en = es;

#if REDUCE_EXPONENT
    /* Reduce exponent by dividing it by phi(m) when m small.  */
    if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
    {
        MPZ_TMP_INIT (new_e, 2);
        mpz_mod_ui (new_e, e, phi (mp[0]));
        e = new_e;
    }
#endif

    use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
    if (use_redc)
    {
        /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
        modlimb_invert (invm, mp[0]);
        invm = -invm;
    }
    else
    {
        /* Normalize m (i.e. make its most significant bit set) as required by
        division functions below.  */
        count_leading_zeros (m_zero_cnt, mp[mn - 1]);
        m_zero_cnt -= GMP_NAIL_BITS;
        if (m_zero_cnt != 0)
        {
            mp_ptr new_mp;
            new_mp = TMP_ALLOC_LIMBS (mn);
            mpn_lshift (new_mp, mp, mn, m_zero_cnt);
            mp = new_mp;
        }
    }

    /* Determine optimal value of k, the number of exponent bits we look at
       at a time.  */
    count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
    e_zero_cnt -= GMP_NAIL_BITS;
    enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
    k = 1;
    K = 2;
    while (2 * enb > K * (2 + k * (3 + k)))
    {
        k++;
        K *= 2;
        if (k == 10)			/* cap allocation */
            break;
    }

    tp = TMP_ALLOC_LIMBS (2 * mn);
    qp = TMP_ALLOC_LIMBS (mn + 1);

    gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);

    /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
    bn = ABSIZ (b);
    bp = PTR(b);
    /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
       for speed or correctness to do this when b and m have the same number of
       limbs, perhaps remove mpn_cmp call.  */
    if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
    {
        /* Reduce possibly huge base while moving it to gp[0].  Use a function
        call to reduce, since we don't want the quotient allocation to
         live until function return.  */
        if (use_redc)
        {
            reduce (tp + mn, bp, bn, mp, mn);	/* b mod m */
            MPN_ZERO (tp, mn);
            mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
        }
        else
        {
            reduce (gp, bp, bn, mp, mn);
        }
    }
    else
    {
        /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
        the rest of the function, but slows things down when the |b| << m.  */
        if (use_redc)
        {
            MPN_ZERO (tp, mn);
            MPN_COPY (tp + mn, bp, bn);
            MPN_ZERO (tp + mn + bn, mn - bn);
            mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
        }
        else
        {
            MPN_COPY (gp, bp, bn);
            MPN_ZERO (gp + bn, mn - bn);
        }
    }

    /* Compute xx^i for odd g < 2^i.  */

    xp = TMP_ALLOC_LIMBS (mn);
    mpn_sqr (tp, gp, mn);
    if (use_redc)
        mpn_redc_1 (xp, tp, mp, mn, invm);		/* xx = x^2*R^n */
    else
        mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
    this_gp = gp;
    for (i = 1; i < K / 2; i++)
    {
        mpn_mul_n (tp, this_gp, xp, mn);
        this_gp += mn;
        if (use_redc)
            mpn_redc_1 (this_gp,tp, mp, mn, invm);	/* g[i] = x^(2i+1)*R^n */
        else
            mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
    }

    /* Start the real stuff.  */
    ep = PTR (e);
    i = en - 1;				/* current index */
    c = ep[i];				/* current limb */
    sh = GMP_NUMB_BITS - e_zero_cnt;	/* significant bits in ep[i] */
    sh -= k;				/* index of lower bit of ep[i] to take into account */
    if (sh < 0)
    {   /* k-sh extra bits are needed */
        if (i > 0)
        {
            i--;
            c <<= (-sh);
            sh += GMP_NUMB_BITS;
            c |= ep[i] >> sh;
        }
    }
Ejemplo n.º 14
0
/* Multiply {up, un} by {vp, vn} and write the result to
   {prodp, un + vn} assuming vn > 3*ceil(un/4).

   Note that prodp gets un + vn limbs stored, even if the actual 
   result only needs un + vn - 1.
*/
void
mpn_toom4_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,
		          mp_srcptr vp, mp_size_t vn)
{
  mp_size_t ind;
  mp_limb_t cy, cy2, r30, r31;
  mp_ptr tp;
  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1, h2;
  TMP_DECL;

  sn = (un + 3) / 4;

  h1 = un - 3*sn;
  h2 = vn - 3*sn;

  ASSERT (vn > 3*sn);
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)
#define b3 (vp + 3*sn)

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   TMP_MARK;

   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))
#define u6 (tp + 4*t4 + 4*(sn+1))

   u6[sn] = mpn_add(u6, a1, sn, a3, h1);
   u5[sn] = mpn_add_n(u5, a2, a0, sn);
   mpn_add_n(u3, u5, u6, sn + 1);
   n4 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u4, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u4, u6, u5, sn + 1);
      n4 = -n4;
   }

   u6[sn] = mpn_add(u6, b1, sn, b3, h2);
   u5[sn] = mpn_add_n(u5, b2, b0, sn);
   mpn_add_n(r2, u5, u6, sn + 1);
   n5 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u5, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u5, u6, u5, sn + 1);
      n5 = -n5;
   }
 
   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, a2, sn, 1);
   MPN_COPY(r2, a3, h1);
   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);
   cy = mpn_addmul_1(r2, a1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u5, r1, r2, sn + 1);
   n6 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(u6, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(u6, r2, r1, sn + 1);
      n6 = -n6;
   }
 
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, b3, b1, h2, 2);
#else
   r1[sn] = mpn_lshift(r1, b2, sn, 1);
   MPN_COPY(r2, b3, h2);
   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);
   cy = mpn_addmul_1(r2, b1, h2, 4);
#endif
   if (sn > h2) 
   {
      cy2 = mpn_lshift(r2 + h2, b1 + h2, sn - h2, 2);
      cy = cy2 + mpn_add_1(r2 + h2, r2 + h2, sn - h2, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u2, r1, r2, sn + 1);
   n8 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(r2, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(r2, r2, r1, sn + 1);
      n8 = -n8;
   }
    
   r30 = r3[0];
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */
   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */
   r3[1] = r31;

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(u2, a2, a3, h1);
   if (sn > h1)
      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); 
   u2[sn] = cy;
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     
#else
   MPN_COPY(u2, a0, sn);
   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);
   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);
   cy = mpn_addmul_1(u2, a3, h1, 8);
   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);
   u2[sn] += cy;
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(r1, b2, b3, h2);
   if (sn > h2)
      cy = mpn_add_1(r1 + h2, b2 + h2, sn - h2, cy); 
   r1[sn] = cy;
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     
#else
   MPN_COPY(r1, b0, sn);
   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);
   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);
   cy = mpn_addmul_1(r1, b3, h2, 8);
   if (sn > h2) cy = mpn_add_1(r1 + h2, r1 + h2, sn - h2, cy);
   r1[sn] += cy;
#endif
   
   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */
   
   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h2); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */

   TC4_DENORM(r1, n1, t4 - 1);

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != un + vn) 
   {
	  MPN_ZERO((rp + rpn), un + vn - rpn);
   }

   TMP_FREE;
}
Ejemplo n.º 15
0
void
mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
{
  mp_srcptr op1_ptr, op2_ptr;
  mp_size_t op1_size, op2_size;
  mp_ptr res_ptr;
  mp_size_t res_size, res_alloc;
  TMP_DECL;

  TMP_MARK;
  op1_size = SIZ(op1);
  op2_size = SIZ(op2);

  op1_ptr = PTR(op1);
  op2_ptr = PTR(op2);
  res_ptr = PTR(res);

  if (op1_size >= 0)
    {
      if (op2_size >= 0)
	{
	  if (op1_size >= op2_size)
	    {
	      if (ALLOC(res) < op1_size)
		{
		  _mpz_realloc (res, op1_size);
		  /* No overlapping possible: op1_ptr = PTR(op1); */
		  op2_ptr = PTR(op2);
		  res_ptr = PTR(res);
		}

	      if (res_ptr != op1_ptr)
		MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
			  op1_size - op2_size);
	      if (LIKELY (op2_size != 0))
		mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
	      res_size = op1_size;
	    }
	  else
	    {
	      if (ALLOC(res) < op2_size)
		{
		  _mpz_realloc (res, op2_size);
		  op1_ptr = PTR(op1);
		  /* No overlapping possible: op2_ptr = PTR(op2); */
		  res_ptr = PTR(res);
		}

	      if (res_ptr != op2_ptr)
		MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
			  op2_size - op1_size);
	      if (LIKELY (op1_size != 0))
		mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
	      res_size = op2_size;
	    }

	  MPN_NORMALIZE (res_ptr, res_size);
	  SIZ(res) = (int) res_size; // (int) added by PM
	  return;
	}
      else /* op2_size < 0 */
	{
	  /* Fall through to the code at the end of the function.  */
	}
    }
  else
    {
      if (op2_size < 0)
	{
	  mp_ptr opx, opy;

	  /* Both operands are negative, the result will be positive.
	      (-OP1) ^ (-OP2) =
	     = ~(OP1 - 1) ^ ~(OP2 - 1) =
	     = (OP1 - 1) ^ (OP2 - 1)  */

	  op1_size = -op1_size;
	  op2_size = -op2_size;

	  /* Possible optimization: Decrease mpn_sub precision,
	     as we won't use the entire res of both.  */
	  TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
	  mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
	  op1_ptr = opx;

	  mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
	  op2_ptr = opy;

	  if (op1_size > op2_size)
	    MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);

	  res_alloc = op2_size;
	  res_ptr = MPZ_REALLOC (res, res_alloc);

	  MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
		    op2_size - op1_size);
	  mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
	  res_size = op2_size;

	  MPN_NORMALIZE (res_ptr, res_size);
	  SIZ(res) = (int) res_size; // (int) added by PM
	  TMP_FREE;
	  return;
	}
      else
	{
	  /* We should compute -OP1 ^ OP2.  Swap OP1 and OP2 and fall
	     through to the code that handles OP1 ^ -OP2.  */
	  MPZ_SRCPTR_SWAP (op1, op2);
	  MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
	}
    }

  {
    mp_ptr opx;
    mp_limb_t cy;

    /* Operand 2 negative, so will be the result.
       -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =
       = ~(OP1 ^ ~(OP2 - 1)) + 1 =
       = (OP1 ^ (OP2 - 1)) + 1      */

    op2_size = -op2_size;

    opx = TMP_ALLOC_LIMBS (op2_size);
    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
    op2_ptr = opx;

    res_alloc = MAX (op1_size, op2_size) + 1;
    if (ALLOC(res) < res_alloc)
      {
	_mpz_realloc (res, res_alloc);
	op1_ptr = PTR(op1);
	/* op2_ptr points to temporary space.  */
	res_ptr = PTR(res);
      }

    if (op1_size > op2_size)
      {
	MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
	mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
	res_size = op1_size;
      }
    else
      {
	MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
	if (LIKELY (op1_size != 0))
	  mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
	res_size = op2_size;
      }

    cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
    res_ptr[res_size] = cy;
    res_size += (cy != 0);

    MPN_NORMALIZE (res_ptr, res_size);
    SIZ(res) = (int) -res_size; // (int) added by PM
    TMP_FREE;
  }
}
Ejemplo n.º 16
0
void
mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
{
  mpir_ui g_zero_bits, u_zero_bits, v_zero_bits;
  mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
  mp_ptr tp;
  mp_ptr up;
  mp_size_t usize;
  mp_ptr vp;
  mp_size_t vsize;
  mp_size_t gsize;
  TMP_DECL;

  up = PTR(u);
  usize = ABSIZ (u);
  vp = PTR(v);
  vsize = ABSIZ (v);
  /* GCD(0, V) == V.  */
  if (usize == 0)
    {
      SIZ (g) = vsize;
      if (g == v)
	return;
      MPZ_REALLOC (g, vsize);
      MPN_COPY (PTR (g), vp, vsize);
      return;
    }

  /* GCD(U, 0) == U.  */
  if (vsize == 0)
    {
      SIZ (g) = usize;
      if (g == u)
	return;
      MPZ_REALLOC (g, usize);
      MPN_COPY (PTR (g), up, usize);
      return;
    }

  if (usize == 1)
    {
      SIZ (g) = 1;
      PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]);
      return;
    }

  if (vsize == 1)
    {
      SIZ(g) = 1;
      PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]);
      return;
    }

  TMP_MARK;

  /*  Eliminate low zero bits from U and V and move to temporary storage.  */
  while (*up == 0)
    up++;
  u_zero_limbs = up - PTR(u);
  usize -= u_zero_limbs;
  count_trailing_zeros (u_zero_bits, *up);
  tp = up;
  up = TMP_ALLOC_LIMBS (usize);
  if (u_zero_bits != 0)
    {
      mpn_rshift (up, tp, usize, u_zero_bits);
      usize -= up[usize - 1] == 0;
    }
  else
    MPN_COPY (up, tp, usize);

  while (*vp == 0)
    vp++;
  v_zero_limbs = vp - PTR (v);
  vsize -= v_zero_limbs;
  count_trailing_zeros (v_zero_bits, *vp);
  tp = vp;
  vp = TMP_ALLOC_LIMBS (vsize);
  if (v_zero_bits != 0)
    {
      mpn_rshift (vp, tp, vsize, v_zero_bits);
      vsize -= vp[vsize - 1] == 0;
    }
  else
    MPN_COPY (vp, tp, vsize);

  if (u_zero_limbs > v_zero_limbs)
    {
      g_zero_limbs = v_zero_limbs;
      g_zero_bits = v_zero_bits;
    }
  else if (u_zero_limbs < v_zero_limbs)
    {
      g_zero_limbs = u_zero_limbs;
      g_zero_bits = u_zero_bits;
    }
  else  /*  Equal.  */
    {
      g_zero_limbs = u_zero_limbs;
      g_zero_bits = MIN (u_zero_bits, v_zero_bits);
    }

  /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */
  vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
    ? mpn_gcd (vp, vp, vsize, up, usize)
    : mpn_gcd (vp, up, usize, vp, vsize);

  /*  Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits).  */
  gsize = vsize + g_zero_limbs;
  if (g_zero_bits != 0)
    {
      mp_limb_t cy_limb;
      gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
      MPZ_REALLOC (g, gsize);
      MPN_ZERO (PTR (g), g_zero_limbs);

      tp = PTR(g) + g_zero_limbs;
      cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
      if (cy_limb != 0)
	tp[vsize] = cy_limb;
    }
Ejemplo n.º 17
0
REGPARM_ATTR (1) static void
mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
{
  mp_size_t  xsize, ysize, tsize, wsize, wsize_signed;
  mp_ptr     wp, tp;
  mp_limb_t  c, high;
  TMP_DECL (marker);

  /* w unaffected if x==0 or y==0 */
  xsize = SIZ(x);
  ysize = SIZ(y);
  if (xsize == 0 || ysize == 0)
    return;

  /* make x the bigger of the two */
  if (ABS(ysize) > ABS(xsize))
    {
      MPZ_SRCPTR_SWAP (x, y);
      MP_SIZE_T_SWAP (xsize, ysize);
    }

  sub ^= ysize;
  ysize = ABS(ysize);

  /* use mpn_addmul_1/mpn_submul_1 if possible */
  if (ysize == 1)
    {
      mpz_aorsmul_1 (w, x, PTR(y)[0], sub);
      return;
    }

  sub ^= xsize;
  xsize = ABS(xsize);

  wsize_signed = SIZ(w);
  sub ^= wsize_signed;
  wsize = ABS(wsize_signed);

  tsize = xsize + ysize;
  MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
  wp = PTR(w);

  if (wsize_signed == 0)
    {
      /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,
         since we know x,y!=0 but w==0.  */
      high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
      tsize -= (high == 0);
      SIZ(w) = (sub >= 0 ? tsize : -tsize);
      return;
    }

  TMP_MARK (marker);
  tp = TMP_ALLOC_LIMBS (tsize);

  high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize);
  tsize -= (high == 0);
  ASSERT (tp[tsize-1] != 0);
  if (sub >= 0)
    {
      mp_srcptr up    = wp;
      mp_size_t usize = wsize;

      if (usize < tsize)
        {
          up    = tp;
          usize = tsize;
          tp    = wp;
          tsize = wsize;

          wsize = usize;
        }

      c = mpn_add (wp, up,usize, tp,tsize);
      wp[wsize] = c;
      wsize += (c != 0);
    }
  else
    {
      mp_srcptr up    = wp;
      mp_size_t usize = wsize;

      if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
        {
          up    = tp;
          usize = tsize;
          tp    = wp;
          tsize = wsize;

          wsize = usize;
          wsize_signed = -wsize_signed;
        }

      ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
      wsize = usize;
      MPN_NORMALIZE (wp, wsize);
    }

  SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize);

  TMP_FREE (marker);
}
Ejemplo n.º 18
0
int
mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
{
  mp_size_t scratch;
  mp_size_t matrix_scratch;
  mp_ptr tp;

  TMP_DECL;

  ASSERT (n > 0);
  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
  ASSERT ( (bp[0] | ap[0]) & 1);

  /* FIXME: Check for small sizes first, before setting up temporary
     storage etc. */
  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);

  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
    {
      mp_size_t hgcd_scratch;
      mp_size_t update_scratch;
      mp_size_t p = CHOOSE_P (n);
      mp_size_t dc_scratch;

      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
      hgcd_scratch = mpn_hgcd_itch (n - p);
      update_scratch = p + n - 1;

      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
      if (dc_scratch > scratch)
	scratch = dc_scratch;
    }

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS(scratch);

  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
    {
      struct hgcd_matrix M;
      mp_size_t p = 2*n/3;
      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
      mp_size_t nn;
      mpn_hgcd_matrix_init (&M, n - p, tp);

      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
			    tp + matrix_scratch);
      if (nn > 0)
	{
	  ASSERT (M.n <= (n - p - 1)/2);
	  ASSERT (M.n + p <= (p + n - 1) / 2);
	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
	}
      else
	{
	  /* Temporary storage n */
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
	  if (!n)
	    {
	      TMP_FREE;
	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
	    }
	}
    }

  while (n > 2)
    {
      struct hgcd_matrix1 M;
      mp_limb_t ah, al, bh, bl;
      mp_limb_t mask;

      mask = ap[n-1] | bp[n-1];
      ASSERT (mask > 0);

      if (mask & GMP_NUMB_HIGHBIT)
	{
	  ah = ap[n-1]; al = ap[n-2];
	  bh = bp[n-1]; bl = bp[n-2];
	}
      else
	{
	  int shift;

	  count_leading_zeros (shift, mask);
	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
	}

      /* Try an mpn_nhgcd2 step */
      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
	{
	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
	  MP_PTR_SWAP (ap, tp);
	}
      else
	{
	  /* mpn_hgcd2 has failed. Then either one of a or b is very
	     small, or the difference is very small. Perform one
	     subtraction followed by one division. */
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
	  if (!n)
	    {
	      TMP_FREE;
	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
	    }
	}
    }

  if (bits >= 16)
    MP_PTR_SWAP (ap, bp);

  ASSERT (bp[0] & 1);

  if (n == 1)
    {
      mp_limb_t al, bl;
      al = ap[0];
      bl = bp[0];

      TMP_FREE;
      if (bl == 1)
	return 1 - 2*(bits & 1);
      else
	return mpn_jacobi_base (al, bl, bits << 1);
    }

  else
    {
      int res = mpn_jacobi_2 (ap, bp, bits & 1);
      TMP_FREE;
      return res;
    }
}
Ejemplo n.º 19
0
mp_size_t
mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
{
  mp_size_t talloc;
  mp_size_t scratch;
  mp_size_t matrix_scratch;
  mp_size_t ualloc = n + 1;

  struct gcdext_ctx ctx;
  mp_size_t un;
  mp_ptr u0;
  mp_ptr u1;

  mp_ptr tp;

  TMP_DECL;

  ASSERT (an >= n);
  ASSERT (n > 0);
  ASSERT (bp[n-1] > 0);

  TMP_MARK;

  /* FIXME: Check for small sizes first, before setting up temporary
     storage etc. */
  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);

  /* For initial division */
  scratch = an - n + 1;
  if (scratch > talloc)
    talloc = scratch;

  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      /* For hgcd loop. */
      mp_size_t hgcd_scratch;
      mp_size_t update_scratch;
      mp_size_t p1 = CHOOSE_P_1 (n);
      mp_size_t p2 = CHOOSE_P_2 (n);
      mp_size_t min_p = MIN(p1, p2);
      mp_size_t max_p = MAX(p1, p2);
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
      hgcd_scratch = mpn_hgcd_itch (n - min_p);
      update_scratch = max_p + n - 1;

      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
      if (scratch > talloc)
	talloc = scratch;

      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
	 copies of a and b. */
      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
	+ 3*GCDEXT_DC_THRESHOLD;

      if (scratch > talloc)
	talloc = scratch;

      /* Cofactors u0 and u1 */
      talloc += 2*(n+1);
    }

  tp = TMP_ALLOC_LIMBS(talloc);

  if (an > n)
    {
      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);

      if (mpn_zero_p (ap, n))
	{
	  MPN_COPY (gp, bp, n);
	  *usizep = 0;
	  TMP_FREE;
	  return n;
	}
    }

  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);

      TMP_FREE;
      return gn;
    }

  MPN_ZERO (tp, 2*ualloc);
  u0 = tp; tp += ualloc;
  u1 = tp; tp += ualloc;

  ctx.gp = gp;
  ctx.up = up;
  ctx.usize = usizep;

  {
    /* For the first hgcd call, there are no u updates, and it makes
       some sense to use a different choice for p. */

    /* FIXME: We could trim use of temporary storage, since u0 and u1
       are not used yet. For the hgcd call, we could swap in the u0
       and u1 pointers for the relevant matrix elements. */

    struct hgcd_matrix M;
    mp_size_t p = CHOOSE_P_1 (n);
    mp_size_t nn;

    mpn_hgcd_matrix_init (&M, n - p, tp);
    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
    if (nn > 0)
      {
	ASSERT (M.n <= (n - p - 1)/2);
	ASSERT (M.n + p <= (p + n - 1) / 2);

	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);

	MPN_COPY (u0, M.p[1][0], M.n);
	MPN_COPY (u1, M.p[1][1], M.n);
	un = M.n;
	while ( (u0[un-1] | u1[un-1] ) == 0)
	  un--;
      }
    else
      {
	/* mpn_hgcd has failed. Then either one of a or b is very
	   small, or the difference is very small. Perform one
	   subtraction followed by one division. */
	u1[0] = 1;

	ctx.u0 = u0;
	ctx.u1 = u1;
	ctx.tp = tp + n; /* ualloc */
	ctx.un = 1;

	/* Temporary storage n */
	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
	if (n == 0)
	  {
	    TMP_FREE;
	    return ctx.gn;
	  }

	un = ctx.un;
	ASSERT (un < ualloc);
      }
  }

  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      struct hgcd_matrix M;
      mp_size_t p = CHOOSE_P_2 (n);
      mp_size_t nn;

      mpn_hgcd_matrix_init (&M, n - p, tp);
      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
      if (nn > 0)
	{
	  mp_ptr t0;

	  t0 = tp + matrix_scratch;
	  ASSERT (M.n <= (n - p - 1)/2);
	  ASSERT (M.n + p <= (p + n - 1) / 2);

	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);

	  /* By the same analysis as for mpn_hgcd_matrix_mul */
	  ASSERT (M.n + un <= ualloc);

	  /* FIXME: This copying could be avoided by some swapping of
	   * pointers. May need more temporary storage, though. */
	  MPN_COPY (t0, u0, un);

	  /* Temporary storage ualloc */
	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);

	  ASSERT (un < ualloc);
	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
	}
      else
	{
	  /* mpn_hgcd has failed. Then either one of a or b is very
	     small, or the difference is very small. Perform one
	     subtraction followed by one division. */
	  ctx.u0 = u0;
	  ctx.u1 = u1;
	  ctx.tp = tp + n; /* ualloc */
	  ctx.un = un;

	  /* Temporary storage n */
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
	  if (n == 0)
	    {
	      TMP_FREE;
	      return ctx.gn;
	    }

	  un = ctx.un;
	  ASSERT (un < ualloc);
	}
    }
  /* We have A = ... a + ... b
	     B =  u0 a +  u1 b

	     a = u1  A + ... B
	     b = -u0 A + ... B

     with bounds

       |u0|, |u1| <= B / min(a, b)

     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
     in which case the only reduction done so far is a = A - k B for
     some k.

     Compute g = u a + v b = (u u1 - v u0) A + (...) B
     Here, u, v are bounded by

       |u| <= b,
       |v| <= a
  */

  ASSERT ( (ap[n-1] | bp[n-1]) > 0);

  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
    {
      /* Must return the smallest cofactor, +u1 or -u0 */
      int c;

      MPN_COPY (gp, ap, n);

      MPN_CMP (c, u0, u1, un);
      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
	 this case we choose the cofactor + 1, corresponding to G = A
	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
      if (c < 0)
	{
	  MPN_NORMALIZE (u0, un);
	  MPN_COPY (up, u0, un);
	  *usizep = -un;
	}
      else
	{
	  MPN_NORMALIZE_NOT_ZERO (u1, un);
	  MPN_COPY (up, u1, un);
	  *usizep = un;
	}

      TMP_FREE;
      return n;
    }
  else if (UNLIKELY (u0[0] == 0) && un == 1)
    {
      mp_size_t gn;
      ASSERT (u1[0] == 1);

      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);

      TMP_FREE;
      return gn;
    }
  else
    {
      mp_size_t u0n;
      mp_size_t u1n;
      mp_size_t lehmer_un;
      mp_size_t lehmer_vn;
      mp_size_t gn;

      mp_ptr lehmer_up;
      mp_ptr lehmer_vp;
      int negate;

      lehmer_up = tp; tp += n;

      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
      MPN_COPY (tp, ap, n);
      MPN_COPY (tp + n, bp, n);
      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);

      u0n = un;
      MPN_NORMALIZE (u0, u0n);
      ASSERT (u0n > 0);

      if (lehmer_un == 0)
	{
	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
	  MPN_COPY (up, u0, u0n);
	  *usizep = -u0n;

	  TMP_FREE;
	  return gn;
	}

      lehmer_vp = tp;
      /* Compute v = (g - u a) / b */
      lehmer_vn = compute_v (lehmer_vp,
			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);

      if (lehmer_un > 0)
	negate = 0;
      else
	{
	  lehmer_un = -lehmer_un;
	  negate = 1;
	}

      u1n = un;
      MPN_NORMALIZE (u1, u1n);
      ASSERT (u1n > 0);

      ASSERT (lehmer_un + u1n <= ualloc);
      ASSERT (lehmer_vn + u0n <= ualloc);

      /* We may still have v == 0 */

      /* Compute u u0 */
      if (lehmer_un <= u1n)
	/* Should be the common case */
	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
      else
	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);

      un = u1n + lehmer_un;
      un -= (up[un - 1] == 0);

      if (lehmer_vn > 0)
	{
	  mp_limb_t cy;

	  /* Overwrites old u1 value */
	  if (lehmer_vn <= u0n)
	    /* Should be the common case */
	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
	  else
	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);

	  u1n = u0n + lehmer_vn;
	  u1n -= (u1[u1n - 1] == 0);

	  if (u1n <= un)
	    {
	      cy = mpn_add (up, up, un, u1, u1n);
	    }
	  else
	    {
	      cy = mpn_add (up, u1, u1n, up, un);
	      un = u1n;
	    }
	  up[un] = cy;
	  un += (cy != 0);

	  ASSERT (un < ualloc);
	}
      *usizep = negate ? -un : un;

      TMP_FREE;
      return gn;
    }
}
Ejemplo n.º 20
0
                  y = y1 2^(n2 GMP_NUMB_BITS) + y0 */

      /* x0 * y0 */
      mpn_mul_n (rp, xp, yp, n2);
      if (n1 != n2)
	rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);

      /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, xp + n1, yp, n2);
      mpn_add_n (rp + n1, rp + n1, tp, n2);

      /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, yp + n2, xp, n1);
      mpn_add_n (rp + n2, rp + n2, tp, n1);
      TMP_SFREE;
    }
  else
    {
      /* For really large operands, use plain mpn_mul_n but throw away upper n
	 limbs of result.  */
      mp_ptr tp;
      TMP_DECL;
      TMP_MARK;
      tp = TMP_ALLOC_LIMBS (2 * n);

      mpn_mul_n (tp, xp, yp, n);
      MPN_COPY (rp, tp, n);
      TMP_FREE;
    }
}
Ejemplo n.º 21
0
/* mpz_mul -- Multiply two integers.

Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011 Free
Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */

#include <stdio.h> /* for NULL */
#include "gmp.h"
#include "gmp-impl.h"
#ifdef BERKELEY_MP
#include "mp.h"
#endif


void
#ifndef BERKELEY_MP
mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
#else /* BERKELEY_MP */
mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
#endif /* BERKELEY_MP */
{
  mp_size_t usize;
  mp_size_t vsize;
  mp_size_t wsize;
  mp_size_t sign_product;
  mp_ptr up, vp;
  mp_ptr wp;
  mp_ptr free_me;
  size_t free_me_size;
  mp_limb_t cy_limb;
  TMP_DECL;

  usize = SIZ (u);
  vsize = SIZ (v);
  sign_product = usize ^ vsize;
  usize = ABS (usize);
  vsize = ABS (vsize);

  if (usize < vsize)
    {
      MPZ_SRCPTR_SWAP (u, v);
      MP_SIZE_T_SWAP (usize, vsize);
    }

  if (vsize == 0)
    {
      SIZ(w) = 0;
      return;
    }

#if HAVE_NATIVE_mpn_mul_2
  if (vsize <= 2)
    {
      MPZ_REALLOC (w, usize+vsize);
      wp = PTR(w);
      if (vsize == 1)
        cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
      else
        {
          cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v));
          usize++;
        }
      wp[usize] = cy_limb;
      usize += (cy_limb != 0);
      SIZ(w) = (sign_product >= 0 ? usize : -usize);
      return;
    }
#else
  if (vsize == 1)
    {
      MPZ_REALLOC (w, usize+1);
      wp = PTR(w);
      cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
      wp[usize] = cy_limb;
      usize += (cy_limb != 0);
      SIZ(w) = (sign_product >= 0 ? usize : -usize);
      return;
    }
#endif

  TMP_MARK;
  free_me = NULL;
  up = PTR(u);
  vp = PTR(v);
  wp = PTR(w);

  /* Ensure W has space enough to store the result.  */
  wsize = usize + vsize;
  if (ALLOC(w) < wsize)
    {
      if (wp == up || wp == vp)
	{
	  free_me = wp;
	  free_me_size = ALLOC(w);
	}
      else
	(*__gmp_free_func) (wp, ALLOC(w) * BYTES_PER_MP_LIMB);

      ALLOC(w) = wsize;
      wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
      PTR(w) = wp;
    }
  else
    {
      /* Make U and V not overlap with W.  */
      if (wp == up)
	{
	  /* W and U are identical.  Allocate temporary space for U.  */
	  up = TMP_ALLOC_LIMBS (usize);
	  /* Is V identical too?  Keep it identical with U.  */
	  if (wp == vp)
	    vp = up;
	  /* Copy to the temporary space.  */
	  MPN_COPY (up, wp, usize);
	}
      else if (wp == vp)
	{
	  /* W and V are identical.  Allocate temporary space for V.  */
	  vp = TMP_ALLOC_LIMBS (vsize);
	  /* Copy to the temporary space.  */
	  MPN_COPY (vp, wp, vsize);
	}
    }

  if (up == vp)
    {
      mpn_sqr (wp, up, usize);
      cy_limb = wp[wsize - 1];
    }
  else
    {
      cy_limb = mpn_mul (wp, up, usize, vp, vsize);
    }

  wsize -= cy_limb == 0;

  SIZ(w) = sign_product < 0 ? -wsize : wsize;
  if (free_me != NULL)
    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
  TMP_FREE;
}