Exemple #1
0
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
{
  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
  mp_limb_t  c = 0;
  mp_size_t  i;

  ASSERT (size >= 1);
  ASSERT (d & 1);

  binvert_limb (inverse, d);
  dmul = d << GMP_NAIL_BITS;

  for (i = 0; i < size; i++)
    {
      ASSERT (c==0 || c==1);

      s = src[i];
      SUBC_LIMB (c1, x, s, c);

      SUBC_LIMB (c2, y, x, h);
      c = c1 + c2;

      y = (y * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, y, dmul);
    }

  h += c;
  return h;
}
Exemple #2
0
void
mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
{
  mp_ptr xp;
  mp_size_t rn, newrn;
  mp_size_t sizes[NPOWS], *sizp;
  mp_limb_t di;

  /* Compute the computation precisions from highest to lowest, leaving the
     base case size in 'rn'.  */
  sizp = sizes;
  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
    *sizp++ = rn;

  xp = scratch;

  /* Compute a base value using a low-overhead O(n^2) algorithm.  FIXME: We
     should call some divide-and-conquer lsb division function here for an
     operand subrange.  */
  MPN_ZERO (xp, rn);
  xp[0] = 1;
  binvert_limb (di, up[0]);
  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
    mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di);
  else
    mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di);

  /* Use Newton iterations to get the desired precision.  */
  for (; rn < n; rn = newrn)
    {
      newrn = *--sizp;

#if WANT_FFT
      if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD))
	{
	  int k;
	  mp_size_t m, i;

	  k = mpn_fft_best_k (newrn, 0);
	  m = mpn_fft_next_size (newrn, k);
	  mpn_mul_fft (xp, m, up, newrn, rp, rn, k);
	  for (i = rn - 1; i >= 0; i--)
	    if (xp[i] > (i == 0))
	      {
		mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1);
		break;
	      }
	}
      else
#endif
	mpn_mul (xp, up, newrn, rp, rn);
      mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
      mpn_neg_n (rp + rn, rp + rn, newrn - rn);
    }
}
Exemple #3
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, tz;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dp, qp, rp;
  mp_limb_t rh;
  mp_limb_t t;
  mp_limb_t dinv;
  int count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }


  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (tz);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = 0; test < count;)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
      if (maxdbits > nbits)
	dbits = random_word (rands) % nbits + 1;
      else
	dbits = random_word (rands) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	{
	  RANDFUNC (n, rands, nbits);
	  do
	    {
	      RANDFUNC (d, rands, dbits);
	    }
	  while (mpz_sgn (d) == 0);

	  np = PTR (n);
	  dp = PTR (d);
	  nn = SIZ (n);
	  dn = SIZ (d);
	}
      while (nn < dn);

      dp[0] |= 1;

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	dp[0] = GMP_NUMB_MAX;

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = 0; i <= clearn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	}

      test++;

      binvert_limb (dinv, dp[0]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_qr */
	      MPN_ZERO (qp, nn - dn);
	      MPN_ZERO (rp, dn);
	      MPN_COPY (rp, np, nn);
	      rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
	    }

	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_q */
	      MPN_COPY (rp, np, nn);
	      MPN_ZERO (qp, nn - dn);
	      mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
	    }
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_qr */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_q */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
	}

      if (nn > dn)
	{
	  /* Test mpn_bdiv_qr */
	  itch = mpn_bdiv_qr_itch (nn, dn);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_ZERO (qp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);

	  check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
	}

      if (nn - dn < 2 || dn < 2)
	continue;

      /* Test mpn_mu_bdiv_qr */
      itch = mpn_mu_bdiv_qr_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn);
      MPN_ZERO (rp, dn);
      rp[dn] = rran1;
      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");

      /* Test mpn_mu_bdiv_q */
      itch = mpn_mu_bdiv_q_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn + 1);
      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (tz);

  tests_end ();
  return 0;
}
Exemple #4
0
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.

   Iterates

     r' <-- r - r * (a^{k-1} r^k - 1) / n

   If

     a^{k-1} r^k = 1 (mod 2^m),

   then

     a^{k-1} r'^k = 1 (mod 2^{2m}),

   Compute the update term as

     r' = r - (a^{k-1} r^{k+1} - r) / k

   where we still have cancellation of low limbs.

 */
void
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
{
  mp_size_t sizes[GMP_LIMB_BITS * 2];
  mp_ptr akm1, tp, rnp, ep;
  mp_limb_t a0, r0, km1, kp1h, kinv;
  mp_size_t rn;
  unsigned i;

  TMP_DECL;

  ASSERT (n > 0);
  ASSERT (ap[0] & 1);
  ASSERT (k & 1);
  ASSERT (k >= 3);

  TMP_MARK;

  akm1 = TMP_ALLOC_LIMBS (4*n);
  tp = akm1 + n;

  km1 = k-1;
  /* FIXME: Could arrange the iteration so we don't need to compute
     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     that we can use wraparound also for a*r, since the low half is
     unchanged from the previous iteration. Or possibly mulmid. Also,
     a r = a^{1/k}, so we get that value too, for free? */
  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */

  a0 = ap[0];
  binvert_limb (kinv, k);

  /* 4 bits: a^{1/k - 1} (mod 16):

	a % 8
	1 3 5 7
   k%4 +-------
     1 |1 1 1 1
     3 |1 9 9 1
  */
  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
#if GMP_NUMB_BITS > 32
  {
    unsigned prec = 32;
    do
      {
	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
	prec *= 2;
      }
    while (prec < GMP_NUMB_BITS);
  }
#endif

  rp[0] = r0;
  if (n == 1)
    {
      TMP_FREE;
      return;
    }

  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
  kp1h = k/2 + 1;

  /* FIXME: Special case for two limb iteration. */
  rnp = TMP_ALLOC_LIMBS (2*n + 1);
  ep = rnp + n;

  /* FIXME: Possible to this on the fly with some bit fiddling. */
  for (i = 0; n > 1; n = (n + 1)/2)
    sizes[i++] = n;

  rn = 1;

  while (i-- > 0)
    {
      /* Compute x^{k+1}. */
      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
			       final iteration. */
      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);

      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */

      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
      ASSERT (mpn_cmp (ep, rp, rn) == 0);

      ASSERT (sizes[i] <= 2*rn);
      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
      rn = sizes[i];
    }
  TMP_FREE;
}
Exemple #5
0
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
                     mp_limb_t orig_c)
{
  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
  mp_limb_t  c = orig_c;
  mp_size_t  i;

  ASSERT (size >= 1);
  ASSERT (d & 1);
  ASSERT_MPN (src, size);
  ASSERT_LIMB (d);
  ASSERT_LIMB (c);

  if (size == 1)
    {
      s = src[0];
      if (s > c)
	{
	  l = s-c;
	  h = l % d;
	  if (h != 0)
	    h = d - h;
	}
      else
	{
	  l = c-s;
	  h = l % d;
	}
      return h;
    }


  binvert_limb (inverse, d);
  dmul = d << GMP_NAIL_BITS;

  i = 0;
  do
    {
      s = src[i];
      SUBC_LIMB (c, l, s, c);
      l = (l * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, l, dmul);
      c += h;
    }
  while (++i < size-1);


  s = src[i];
  if (s <= d)
    {
      /* With high<=d the final step can be a subtract and addback.  If c==0
	 then the addback will restore to l>=0.  If c==d then will get l==d
	 if s==0, but that's ok per the function definition.  */

      l = c - s;
      if (c < s)
	l += d;

      ret = l;
    }
  else
    {
      /* Can't skip a divide, just do the loop code once more. */

      SUBC_LIMB (c, l, s, c);
      l = (l * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, l, dmul);
      c += h;
      ret = c;
    }

  ASSERT (orig_c < d ? ret < d : ret <= d);
  return ret;
}
Exemple #6
0
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
   Requires that mp[n-1..0] is odd.  FIXME: is this true?
   Requires that ep[en-1..0] is > 1.
   Uses scratch space at tp of 3n+1 limbs.  */
void
mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
	      mp_srcptr ep, mp_size_t en,
	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
  mp_limb_t minv;
  int cnt;
  mp_bitcnt_t ebi;
  int windowsize, this_windowsize;
  mp_limb_t expbits;
  mp_ptr pp, this_pp;
  long i;
  int cnd;

  ASSERT (en > 1 || (en == 1 && ep[0] > 0));
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));

  count_leading_zeros (cnt, ep[en - 1]);
  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;

  windowsize = win_size (ebi);

  binvert_limb (minv, mp[0]);
  minv = -minv;

  pp = tp + 4 * n;

  this_pp = pp;
  this_pp[n] = 1;
  redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
  this_pp += n;
  redcify (this_pp, bp, bn, mp, n, tp + 6 * n);

  /* Precompute powers of b and put them in the temporary area at pp.  */
  for (i = (1 << windowsize) - 2; i > 0; i--)
    {
      mpn_mul_basecase (tp, this_pp, n, pp + n, n);
      this_pp += n;
      mpn_redc_1_sec (this_pp, tp, mp, n, minv);
    }

  expbits = getbits (ep, ebi, windowsize);
  if (ebi < windowsize)
    ebi = 0;
  else
    ebi -= windowsize;

#if WANT_CACHE_SECURITY
  mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
#else
  MPN_COPY (rp, pp + n * expbits, n);
#endif

  while (ebi != 0)
    {
      expbits = getbits (ep, ebi, windowsize);
      this_windowsize = windowsize;
      if (ebi < windowsize)
	{
	  this_windowsize -= windowsize - ebi;
	  ebi = 0;
	}
      else
	ebi -= windowsize;

      do
	{
	  mpn_local_sqr (tp, rp, n, tp + 2 * n);
	  mpn_redc_1_sec (rp, tp, mp, n, minv);
	  this_windowsize--;
	}
      while (this_windowsize != 0);

#if WANT_CACHE_SECURITY
      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
#else
      mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
#endif
      mpn_redc_1_sec (rp, tp, mp, n, minv);
    }

  MPN_COPY (tp, rp, n);
  MPN_ZERO (tp + n, n);
  mpn_redc_1_sec (rp, tp, mp, n, minv);
  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
  mpn_subcnd_n (rp, rp, mp, n, !cnd);
}
Exemple #7
0
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
   Requires that mp[n-1..0] is odd.
   Requires that ep[en-1..0] is > 1.
   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
void
mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
	      mp_srcptr ep, mp_size_t en,
	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
  mp_limb_t mip[2];
  int cnt;
  long ebi;
  int windowsize, this_windowsize;
  mp_limb_t expbits;
  mp_ptr pp, this_pp, last_pp;
  long i;
  int redc_x;
  TMP_DECL;

  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));

  TMP_MARK;

  count_leading_zeros (cnt, ep[en - 1]);
  ebi = en * GMP_LIMB_BITS - cnt;

  windowsize = win_size (ebi);

  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
    {
      binvert_limb (mip[0], mp[0]);
      mip[0] = -mip[0];
      redc_x = 1;
    }
#if defined (HAVE_NATIVE_mpn_addmul_2)
  else
    {
      mpn_binvert (mip, mp, 2, tp);
      mip[0] = -mip[0]; mip[1] = ~mip[1];
      redc_x = 2;
    }
#endif
#if 0
  mpn_binvert (mip, mp, n, tp);
  redc_x = 0;
#endif

  pp = TMP_ALLOC_LIMBS (n << windowsize);

  this_pp = pp;
  this_pp[n] = 1;
  redcify (this_pp, this_pp + n, 1, mp, n);
  this_pp += n;
  redcify (this_pp, bp, bn, mp, n);

  /* Precompute powers of b and put them in the temporary area at pp.  */
  for (i = (1 << windowsize) - 2; i > 0; i--)
    {
      last_pp = this_pp;
      this_pp += n;
      mpn_mul_n (tp, last_pp, pp + n, n);
      MPN_REDC_X (this_pp, tp, mp, n, mip);
    }

  expbits = getbits (ep, ebi, windowsize);
  ebi -= windowsize;
  if (ebi < 0)
    ebi = 0;

  MPN_COPY (rp, pp + n * expbits, n);

  while (ebi != 0)
    {
      expbits = getbits (ep, ebi, windowsize);
      ebi -= windowsize;
      this_windowsize = windowsize;
      if (ebi < 0)
	{
	  this_windowsize += ebi;
	  ebi = 0;
	}

      do
	{
	  mpn_sqr_n (tp, rp, n);
	  MPN_REDC_X (rp, tp, mp, n, mip);
	  this_windowsize--;
	}
      while (this_windowsize != 0);

#if WANT_CACHE_SECURITY
      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
      mpn_mul_n (tp, rp, tp + 2*n, n);
#else
      mpn_mul_n (tp, rp, pp + n * expbits, n);
#endif
      MPN_REDC_X (rp, tp, mp, n, mip);
    }

  MPN_COPY (tp, rp, n);
  MPN_ZERO (tp + n, n);
  MPN_REDC_X (rp, tp, mp, n, mip);
  if (mpn_cmp (rp, mp, n) >= 0)
    mpn_sub_n (rp, rp, mp, n);
  TMP_FREE;
}
Exemple #8
0
mp_limb_t
mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
{
  mp_limb_t v_inv;

  ASSERT (usize >= 1);
  ASSERT (vsize >= 1);
  ASSERT (usize * GMP_NUMB_BITS >= d);
  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
  ASSERT_MPN (up, usize);
  ASSERT_MPN (vp, vsize);

  /* 1/V mod 2^GMP_NUMB_BITS. */
  binvert_limb (v_inv, vp[0]);

  /* Fast code for two cases previously used by the accel part of mpn_gcd.
     (Could probably remove this now it's inlined there.) */
  if (usize == 2 && vsize == 2 &&
      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
    {
      mp_limb_t hi, lo;
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
      up[0] = 0;
      up[1] -= hi + q*vp[1];
      qp[0] = q;
      if (d == 2*GMP_NUMB_BITS)
        {
          q = (up[1] * v_inv) & GMP_NUMB_MASK;
          up[1] = 0;
          qp[1] = q;
        }
      return 0;
    }

  /* Main loop.  */
  while (d >= GMP_NUMB_BITS)
    {
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      d -= GMP_NUMB_BITS;
      up += 1, usize -= 1;
      *qp++ = q;
    }

  if (d)
    {
      mp_limb_t b;
      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
      if (q <= 1)
	{
	  if (q == 0)
	    return 0;
	  else
	    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
	}
      else
	b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);

      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      return q;
    }

  return 0;
}
Exemple #9
0
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)
{
  mp_limb_t  c = orig_c;
  mp_limb_t  s, l, q, h, inverse;

  ASSERT (size >= 1);
  ASSERT (d & 1);
  ASSERT_MPN (src, size);
  ASSERT_LIMB (d);
  ASSERT_LIMB (c);

  /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */
  if (size == 1)
    {
      s = src[0];
      if (s > c)
	{
	  l = s-c;
	  h = l % d;
	  if (h != 0)
	    h = d - h;
	}
      else
	{
	  l = c-s;
	  h = l % d;
	}
      return h;
    }

  binvert_limb (inverse, d);

  if (d <= 0xFFFFFFFF)
    {
      s = *src++;
      size--;
      do
        {
          SUBC_LIMB (c, l, s, c);
          s = *src++;
          q = l * inverse;
          umul_ppmm_half_lowequal (h, q, d, l);
          c += h;
          size--;
        }
      while (size != 0);

      if (s <= d)
        {
          /* With high s <= d the final step can be a subtract and addback.
             If c==0 then the addback will restore to l>=0.  If c==d then
             will get l==d if s==0, but that's ok per the function
             definition.  */

          l = c - s;
          l += (l > c ? d : 0);

          ASSERT_RETVAL (l);
          return l;
        }
      else
        {
          /* Can't skip a divide, just do the loop code once more. */
          SUBC_LIMB (c, l, s, c);
          q = l * inverse;
          umul_ppmm_half_lowequal (h, q, d, l);
          c += h;

          ASSERT_RETVAL (c);
          return c;
        }
    }
  else
    {
      mp_limb_t  dl = LOW32 (d);
      mp_limb_t  dh = HIGH32 (d);
      long i;

      s = *src++;
      size--;
      do
        {
          SUBC_LIMB (c, l, s, c);
          s = *src++;
          q = l * inverse;
          umul_ppmm_lowequal (h, q, d, dh, dl, l);
          c += h;
          size--;
        }
      while (size != 0);

      if (s <= d)
        {
          /* With high s <= d the final step can be a subtract and addback.
             If c==0 then the addback will restore to l>=0.  If c==d then
             will get l==d if s==0, but that's ok per the function
             definition.  */

          l = c - s;
          l += (l > c ? d : 0);

          ASSERT_RETVAL (l);
          return l;
        }
      else
        {
          /* Can't skip a divide, just do the loop code once more. */
          SUBC_LIMB (c, l, s, c);
          q = l * inverse;
          umul_ppmm_lowequal (h, q, d, dh, dl, l);
          c += h;

          ASSERT_RETVAL (c);
          return c;
        }
    }
}
Exemple #10
0
void
mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
{
  mp_ptr tp2, tp3;
  mp_limb_t kinv, k2, r0, y0;
  mp_size_t order[GMP_LIMB_BITS + 1];
  int i, d;

  ASSERT (bn > 0);
  ASSERT ((k & 1) != 0);

  tp2 = tp + bn;
  tp3 = tp + 2 * bn;
  k2 = k + 1;

  binvert_limb (kinv, k);

  /* 4-bit initial approximation:

   y%16 | 1  3  5  7  9 11 13 15,
    k%4 +-------------------------+k2%4
     1  | 1 11 13  7  9  3  5 15  |  2
     3  | 1  3  5  7  9 11 13 15  |  0

  */
  y0 = yp[0];

  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8);		/* 4 bits */
  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff));	/* 16 bits */
#if GMP_NUMB_BITS > 16
  {
    unsigned prec = 16;
    do
      {
	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
	prec *= 2;
      }
    while (prec < GMP_NUMB_BITS);
  }
#endif

  rp[0] = r0;
  if (bn == 1)
    return;

  /* This initialization doesn't matter for the result (any garbage is
     cancelled in the iteration), but proper initialization makes
     valgrind happier. */
  MPN_ZERO (rp+1, bn-1);

  d = 0;
  for (; bn > 1; bn = (bn + 1) >> 1)
    order[d++] = bn;

  for (i = d - 1; i >= 0; i--)
    {
      bn = order[i];

      mpn_mul_1 (tp, rp, bn, k2);

      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
      mpn_mullo_n (rp, yp, tp2, bn);

      mpn_sub_n (tp2, tp, rp, bn);
      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
    }
}