Beispiel #1
0
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {np, n}^2. The error is less than n ulps of rp[n]. */
void
mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
{
  mp_size_t k;

  MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
  k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
    : (n+4)/2; /* ensures that k >= (n+3)/2 */
  MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  if (k < 0)
    /* we can't use mpn_sqr_basecase here, since it requires
       n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
       is not exported by GMP */
    mpn_sqr_n (rp, np, n);
  else if (k == 0)
    mpfr_mulhigh_n_basecase (rp, np, np, n);
  else
    {
      mp_size_t l = n - k;
      mp_limb_t cy;

      mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
      mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
      /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
      cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
      cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
      mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
    }
}
Beispiel #2
0
void
mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
{
  mp_ptr xp, tp, qp, mp, bp;
  mp_size_t xn, tn, mn, bn;
  int m_zero_cnt;
  int c;
  mp_limb_t e;
  TMP_DECL;

  mp = PTR(m);
  mn = ABSIZ(m);
  if (mn == 0)
    DIVIDE_BY_ZERO;

  if (el == 0)
    {
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
	 depending on if MOD equals 1.  */
      SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
      PTR(r)[0] = 1;
      return;
    }

  TMP_MARK;

  /* Normalize m (i.e. make its most significant bit set) as required by
     division functions below.  */
  count_leading_zeros (m_zero_cnt, mp[mn - 1]);
  m_zero_cnt -= GMP_NAIL_BITS;
  if (m_zero_cnt != 0)
    {
      mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
      mpn_lshift (new_mp, mp, mn, m_zero_cnt);
      mp = new_mp;
    }

  bn = ABSIZ(b);
  bp = PTR(b);
  if (bn > mn)
    {
      /* Reduce possibly huge base.  Use a function call to reduce, since we
	 don't want the quotient allocation to live until function return.  */
      mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
      reduce (new_bp, bp, bn, mp, mn);
      bp = new_bp;
      bn = mn;
      /* Canonicalize the base, since we are potentially going to multiply with
	 it quite a few times.  */
      MPN_NORMALIZE (bp, bn);
    }

  if (bn == 0)
    {
      SIZ(r) = 0;
      TMP_FREE;
      return;
    }

  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
  xp = TMP_ALLOC_LIMBS (mn);

  qp = TMP_ALLOC_LIMBS (mn + 1);

  MPN_COPY (xp, bp, bn);
  xn = bn;

  e = el;
  count_leading_zeros (c, e);
  e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
  c = BITS_PER_MP_LIMB - 1 - c;

  /* Main loop. */

  /* If m is already normalized (high bit of high limb set), and b is the
     same size, but a bigger value, and e==1, then there's no modular
     reductions done and we can end up with a result out of range at the
     end. */
  if (c == 0)
    {
      if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
        mpn_sub_n (xp, xp, mp, mn);
      goto finishup;
    }

  while (c != 0)
    {
      mpn_sqr_n (tp, xp, xn);
      tn = 2 * xn; tn -= tp[tn - 1] == 0;
      if (tn < mn)
	{
	  MPN_COPY (xp, tp, tn);
	  xn = tn;
	}
      else
	{
	  mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
	  xn = mn;
	}

      if ((mp_limb_signed_t) e < 0)
	{
	  mpn_mul (tp, xp, xn, bp, bn);
	  tn = xn + bn; tn -= tp[tn - 1] == 0;
	  if (tn < mn)
	    {
	      MPN_COPY (xp, tp, tn);
	      xn = tn;
	    }
	  else
	    {
	      mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
	      xn = mn;
	    }
	}
      e <<= 1;
      c--;
    }

 finishup:
  /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing
     it with the original MOD.  */
  if (m_zero_cnt != 0)
    {
      mp_limb_t cy;
      cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
      tp[xn] = cy; xn += cy != 0;

      if (xn < mn)
	{
	  MPN_COPY (xp, tp, xn);
	}
      else
	{
	  mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn);
	  xn = mn;
	}
      mpn_rshift (xp, xp, xn, m_zero_cnt);
    }
  MPN_NORMALIZE (xp, xn);

  if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
    {
      mp = PTR(m);			/* want original, unnormalized m */
      mpn_sub (xp, mp, mn, xp, xn);
      xn = mn;
      MPN_NORMALIZE (xp, xn);
    }
  MPZ_REALLOC (r, xn);
  SIZ (r) = xn;
  MPN_COPY (PTR(r), xp, xn);

  TMP_FREE;
}
Beispiel #3
0
int
mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
{
  int cc, inexact;
  mpfr_exp_t ax;
  mp_limb_t *tmp;
  mp_limb_t b1;
  mpfr_prec_t bq;
  mp_size_t bn, tn;
  MPFR_TMP_DECL(marker);

  MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", b, b, rnd_mode),
                 ("y[%#R]=%R inexact=%d", a, a, inexact));

  /* deal with special cases */
  if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(b)))
    {
      if (MPFR_IS_NAN(b))
        {
          MPFR_SET_NAN(a);
          MPFR_RET_NAN;
        }
      MPFR_SET_POS (a);
      if (MPFR_IS_INF(b))
        MPFR_SET_INF(a);
      else
        ( MPFR_ASSERTD(MPFR_IS_ZERO(b)), MPFR_SET_ZERO(a) );
      MPFR_RET(0);
    }
  ax = 2 * MPFR_GET_EXP (b);
  bq = MPFR_PREC(b);

  MPFR_ASSERTD (2 * bq > bq); /* PREC_MAX is /2 so no integer overflow */

  bn = MPFR_LIMB_SIZE(b); /* number of limbs of b */
  tn = 1 + (2 * bq - 1) / GMP_NUMB_BITS; /* number of limbs of square,
                                               2*bn or 2*bn-1 */

  MPFR_TMP_MARK(marker);
  tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) 2 * bn * BYTES_PER_MP_LIMB);

  /* Multiplies the mantissa in temporary allocated space */
  mpn_sqr_n (tmp, MPFR_MANT(b), bn);
  b1 = tmp[2 * bn - 1];

  /* now tmp[0]..tmp[2*bn-1] contains the product of both mantissa,
     with tmp[2*bn-1]>=2^(GMP_NUMB_BITS-2) */
  b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */

  /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
     then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
     and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
  tmp += 2 * bn - tn; /* +0 or +1 */
  if (MPFR_UNLIKELY(b1 == 0))
    mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */

  cc = mpfr_round_raw (MPFR_MANT (a), tmp, 2 * bq, 0,
                       MPFR_PREC (a), rnd_mode, &inexact);
  /* cc = 1 ==> result is a power of two */
  if (MPFR_UNLIKELY(cc))
    MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;

  MPFR_TMP_FREE(marker);
  {
    mpfr_exp_t ax2 = ax + (mpfr_exp_t) (b1 - 1 + cc);
    if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
      return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
    if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
      {
        /* In the rounding to the nearest mode, if the exponent of the exact
           result (i.e. before rounding, i.e. without taking cc into account)
           is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
           both arguments are powers of 2), then round to zero. */
        if (rnd_mode == MPFR_RNDN &&
            (ax + (mpfr_exp_t) b1 < __gmpfr_emin || mpfr_powerof2_raw (b)))
          rnd_mode = MPFR_RNDZ;
        return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
      }
    MPFR_SET_EXP (a, ax2);
    MPFR_SET_POS (a);
  }
  MPFR_RET (inexact);
}
Beispiel #4
0
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
   Requires that mp[n-1..0] is odd.
   Requires that ep[en-1..0] is > 1.
   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
void
mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
	      mp_srcptr ep, mp_size_t en,
	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
  mp_limb_t mip[2];
  int cnt;
  long ebi;
  int windowsize, this_windowsize;
  mp_limb_t expbits;
  mp_ptr pp, this_pp, last_pp;
  long i;
  int redc_x;
  TMP_DECL;

  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));

  TMP_MARK;

  count_leading_zeros (cnt, ep[en - 1]);
  ebi = en * GMP_LIMB_BITS - cnt;

  windowsize = win_size (ebi);

  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
    {
      binvert_limb (mip[0], mp[0]);
      mip[0] = -mip[0];
      redc_x = 1;
    }
#if defined (HAVE_NATIVE_mpn_addmul_2)
  else
    {
      mpn_binvert (mip, mp, 2, tp);
      mip[0] = -mip[0]; mip[1] = ~mip[1];
      redc_x = 2;
    }
#endif
#if 0
  mpn_binvert (mip, mp, n, tp);
  redc_x = 0;
#endif

  pp = TMP_ALLOC_LIMBS (n << windowsize);

  this_pp = pp;
  this_pp[n] = 1;
  redcify (this_pp, this_pp + n, 1, mp, n);
  this_pp += n;
  redcify (this_pp, bp, bn, mp, n);

  /* Precompute powers of b and put them in the temporary area at pp.  */
  for (i = (1 << windowsize) - 2; i > 0; i--)
    {
      last_pp = this_pp;
      this_pp += n;
      mpn_mul_n (tp, last_pp, pp + n, n);
      MPN_REDC_X (this_pp, tp, mp, n, mip);
    }

  expbits = getbits (ep, ebi, windowsize);
  ebi -= windowsize;
  if (ebi < 0)
    ebi = 0;

  MPN_COPY (rp, pp + n * expbits, n);

  while (ebi != 0)
    {
      expbits = getbits (ep, ebi, windowsize);
      ebi -= windowsize;
      this_windowsize = windowsize;
      if (ebi < 0)
	{
	  this_windowsize += ebi;
	  ebi = 0;
	}

      do
	{
	  mpn_sqr_n (tp, rp, n);
	  MPN_REDC_X (rp, tp, mp, n, mip);
	  this_windowsize--;
	}
      while (this_windowsize != 0);

#if WANT_CACHE_SECURITY
      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
      mpn_mul_n (tp, rp, tp + 2*n, n);
#else
      mpn_mul_n (tp, rp, pp + n * expbits, n);
#endif
      MPN_REDC_X (rp, tp, mp, n, mip);
    }

  MPN_COPY (tp, rp, n);
  MPN_ZERO (tp + n, n);
  MPN_REDC_X (rp, tp, mp, n, mip);
  if (mpn_cmp (rp, mp, n) >= 0)
    mpn_sub_n (rp, rp, mp, n);
  TMP_FREE;
}
Beispiel #5
0
mp_size_t
mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
{
    mp_limb_t x;
    int cnt, i;
    mp_size_t rn;
    int par;

    if (exp <= 1)
    {
        if (exp == 0)
        {
            rp[0] = 1;
            return 1;
        }
        else
        {
            MPN_COPY (rp, bp, bn);
            return bn;
        }
    }

    /* Count number of bits in exp, and compute where to put initial square in
       order to magically get results in the entry rp.  Use simple code,
       optimized for small exp.  For large exp, the bignum operations will take
       so much time that the slowness of this code will be negligible.  */
    par = 0;
    cnt = GMP_LIMB_BITS;
    for (x = exp; x != 0; x >>= 1)
    {
        par ^= x & 1;
        cnt--;
    }
    exp <<= cnt;

    if (bn == 1)
    {
        mp_limb_t bl = bp[0];

        if ((cnt & 1) != 0)
            MP_PTR_SWAP (rp, tp);

        mpn_sqr_n (rp, bp, bn);
        rn = 2 * bn;
        rn -= rp[rn - 1] == 0;

        for (i = GMP_LIMB_BITS - cnt - 1;;)
        {
            exp <<= 1;
            if ((exp & GMP_LIMB_HIGHBIT) != 0)
            {
                rp[rn] = mpn_mul_1 (rp, rp, rn, bl);
                rn += rp[rn] != 0;
            }

            if (--i == 0)
                break;

            mpn_sqr_n (tp, rp, rn);
            rn = 2 * rn;
            rn -= tp[rn - 1] == 0;
            MP_PTR_SWAP (rp, tp);
        }
    }
    else
    {
        if (((par ^ cnt) & 1) == 0)
            MP_PTR_SWAP (rp, tp);

        mpn_sqr_n (rp, bp, bn);
        rn = 2 * bn;
        rn -= rp[rn - 1] == 0;

        for (i = GMP_LIMB_BITS - cnt - 1;;)
        {
            exp <<= 1;
            if ((exp & GMP_LIMB_HIGHBIT) != 0)
            {
                rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);
                MP_PTR_SWAP (rp, tp);
            }

            if (--i == 0)
                break;

            mpn_sqr_n (tp, rp, rn);
            rn = 2 * rn;
            rn -= tp[rn - 1] == 0;
            MP_PTR_SWAP (rp, tp);
        }
    }

    return rn;
}
Beispiel #6
0
/* For now, also disable REDC when MOD is even, as the inverse can't handle
   that.  At some point, we might want to make the code faster for that case,
   perhaps using CRR.  */

#ifndef POWM_THRESHOLD
#define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
#endif

#define HANDLE_NEGATIVE_EXPONENT 1
#undef REDUCE_EXPONENT

void
#ifndef BERKELEY_MP
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
#else /* BERKELEY_MP */
pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
#endif /* BERKELEY_MP */
{
  mp_ptr xp, tp, qp, gp, this_gp;
  mp_srcptr bp, ep, mp;
  mp_size_t bn, es, en, mn, xn;
  mp_limb_t invm, c;
  unsigned long int enb;
  mp_size_t i, K, j, l, k;
  int m_zero_cnt, e_zero_cnt;
  int sh;
  int use_redc;
#if HANDLE_NEGATIVE_EXPONENT
  mpz_t new_b;
#endif
#if REDUCE_EXPONENT
  mpz_t new_e;
#endif
  TMP_DECL (marker);

  mp = PTR(m);
  mn = ABSIZ (m);
  if (mn == 0)
    DIVIDE_BY_ZERO;

  TMP_MARK (marker);

  es = SIZ (e);
  if (es <= 0)
    {
      if (es == 0)
	{
	  /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
	     m equals 1.  */
	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
	  PTR(r)[0] = 1;
	  TMP_FREE (marker);	/* we haven't really allocated anything here */
	  return;
	}
#if HANDLE_NEGATIVE_EXPONENT
      MPZ_TMP_INIT (new_b, mn + 1);

      if (! mpz_invert (new_b, b, m))
	DIVIDE_BY_ZERO;
      b = new_b;
      es = -es;
#else
      DIVIDE_BY_ZERO;
#endif
    }
  en = es;

#if REDUCE_EXPONENT
  /* Reduce exponent by dividing it by phi(m) when m small.  */
  if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
    {
      MPZ_TMP_INIT (new_e, 2);
      mpz_mod_ui (new_e, e, phi (mp[0]));
      e = new_e;
    }
#endif

  use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
  if (use_redc)
    {
      /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
      modlimb_invert (invm, mp[0]);
      invm = -invm;
    }
  else
    {
      /* Normalize m (i.e. make its most significant bit set) as required by
	 division functions below.  */
      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
      m_zero_cnt -= GMP_NAIL_BITS;
      if (m_zero_cnt != 0)
	{
	  mp_ptr new_mp;
	  new_mp = TMP_ALLOC_LIMBS (mn);
	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
	  mp = new_mp;
	}
    }

  /* Determine optimal value of k, the number of exponent bits we look at
     at a time.  */
  count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
  e_zero_cnt -= GMP_NAIL_BITS;
  enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
  k = 1;
  K = 2;
  while (2 * enb > K * (2 + k * (3 + k)))
    {
      k++;
      K *= 2;
    }

  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
  qp = TMP_ALLOC_LIMBS (mn + 1);

  gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);

  /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
  bn = ABSIZ (b);
  bp = PTR(b);
  /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
     for speed or correctness to do this when b and m have the same number of
     limbs, perhaps remove mpn_cmp call.  */
  if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
    {
      /* Reduce possibly huge base while moving it to gp[0].  Use a function
	 call to reduce, since we don't want the quotient allocation to
	 live until function return.  */
      if (use_redc)
	{
	  reduce (tp + mn, bp, bn, mp, mn);	/* b mod m */
	  MPN_ZERO (tp, mn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
	}
      else
	{
	  reduce (gp, bp, bn, mp, mn);
	}
    }
  else
    {
      /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
	 the rest of the function, but slows things down when the |b| << m.  */
      if (use_redc)
	{
	  MPN_ZERO (tp, mn);
	  MPN_COPY (tp + mn, bp, bn);
	  MPN_ZERO (tp + mn + bn, mn - bn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
	}
      else
	{
	  MPN_COPY (gp, bp, bn);
	  MPN_ZERO (gp + bn, mn - bn);
	}
    }

  /* Compute xx^i for odd g < 2^i.  */

  xp = TMP_ALLOC_LIMBS (mn);
  mpn_sqr_n (tp, gp, mn);
  if (use_redc)
    redc (xp, mp, mn, invm, tp);		/* xx = x^2*R^n */
  else
    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
  this_gp = gp;
  for (i = 1; i < K / 2; i++)
    {
      mpn_mul_n (tp, this_gp, xp, mn);
      this_gp += mn;
      if (use_redc)
	redc (this_gp, mp, mn, invm, tp);	/* g[i] = x^(2i+1)*R^n */
      else
	mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
    }

  /* Start the real stuff.  */
  ep = PTR (e);
  i = en - 1;				/* current index */
  c = ep[i];				/* current limb */
  sh = GMP_NUMB_BITS - e_zero_cnt;	/* significant bits in ep[i] */
  sh -= k;				/* index of lower bit of ep[i] to take into account */
  if (sh < 0)
    {					/* k-sh extra bits are needed */
      if (i > 0)
	{
	  i--;
	  c <<= (-sh);
	  sh += GMP_NUMB_BITS;
	  c |= ep[i] >> sh;
	}
    }