void
my__gmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
{
	ASSERT_ALWAYS (qxn == 0);

	ASSERT (nn >= 0);
	ASSERT (dn >= 0);
	ASSERT (dn == 0 || dp[dn - 1] != 0);
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));

	int adjust;
	gmp_pi1_t dinv;
	TMP_DECL;
	TMP_MARK;
								 /* conservative tests for quotient size */
	adjust = np[nn - 1] >= dp[dn - 1];
	mp_ptr n2p, d2p;
	mp_limb_t cy;
	int cnt;

	qp[nn - dn] = 0;			 /* zero high quotient limb */
	count_leading_zeros (cnt, dp[dn - 1]);
	cnt -= GMP_NAIL_BITS;
	d2p = TMP_ALLOC_LIMBS (dn);
	mpn_lshift (d2p, dp, dn, cnt);

	for (int i=0; i<dn; i+=1)
	{
		printf("d2p %08x\n", *( (int*) (((void*)(d2p))+(i*4))));
	}


	n2p = TMP_ALLOC_LIMBS (nn + 1);
	cy = mpn_lshift (n2p, np, nn, cnt);
	for (int i=0; i<nn; i+=1)
	{
		printf("n2p %08x\n", *( (int*) (((void*)(n2p))+(i*4))));
	}
	n2p[nn] = cy;
	nn += adjust;

        printf("d2p[dn-1] = %08lx\nd2p[dn-2] = %08lx\n", d2p[dn-1], d2p[dn-2]);
	invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
        printf("dinv %08lx\n", dinv.inv32);
	my_mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
	for (int i=0; i<nn; i+=1)
	{
		printf("inside qp %08x\n", *( (int*) (((void*)(qp))+(i*4))));
	}
	n2p[nn] = cy;

	mpn_rshift (rp, n2p, dn, cnt);
	TMP_FREE;
	return;

}
int main() {

gmp_pi1_t a;
unsigned long in[] = {0x7c2e09b7,0x847c9b5d};
invert_pi1(a, in[1], in[0]);
printf("%08lx\n", a.inv32);
return 0;

}
Example #3
0
void
mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));

  if (n == 1)
    invert_limb (*ip, *dp);
  else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
    {
	/* Maximum scratch needed by this branch: 2*n */
	mp_size_t i;
	mp_ptr xp;

	xp = scratch;				/* 2 * n limbs */
	/* n > 1 here */
	i = n;
	do
	  xp[--i] = GMP_NUMB_MAX;
	while (i);
	mpn_com (xp + n, dp, n);
	if (n == 2) {
	  mpn_divrem_2 (ip, 0, xp, 4, dp);
	} else {
	  gmp_pi1_t inv;
	  invert_pi1 (inv, dp[n-1], dp[n-2]);
	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
	}
    }
  else { /* Use approximated inverse; correct the result if needed. */
      mp_limb_t e; /* The possible error in the approximate inverse */

      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
      e = mpn_ni_invertappr (ip, dp, n, scratch);

      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
	/* Code to detect and correct the "off by one" approximation. */
	mpn_mul_n (scratch, ip, dp, n);
	e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
	if (LIKELY(e)) /* The high part can not give a carry by itself. */
	  e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
	/* If the value was wrong (no carry), correct it (increment). */
	e ^= CNST_LIMB (1);
	MPN_INCR_U (ip, n, e);
      }
  }
}
Example #4
0
void
mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));

  if (n == 1)
    invert_limb (*ip, *dp);
  else {
    TMP_DECL;

    TMP_MARK;
    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
      {
	/* Maximum scratch needed by this branch: 2*n */
	mp_size_t i;
	mp_ptr xp;

	xp = scratch;				/* 2 * n limbs */
	for (i = n - 1; i >= 0; i--)
	  xp[i] = GMP_NUMB_MAX;
	mpn_com (xp + n, dp, n);
	if (n == 2) {
	  mpn_divrem_2 (ip, 0, xp, 4, dp);
	} else {
	  gmp_pi1_t inv;
	  invert_pi1 (inv, dp[n-1], dp[n-2]);
	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
	}
      }
    else { /* Use approximated inverse; correct the result if needed. */
      mp_limb_t e; /* The possible error in the approximate inverse */

      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
      e = mpn_ni_invertappr (ip, dp, n, scratch);

      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
	/* Code to detect and correct the "off by one" approximation. */
	mpn_mul_n (scratch, ip, dp, n);
	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
	if (! mpn_add (scratch, scratch, 2*n, dp, n))
	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
      }
    }
    TMP_FREE;
  }
}
Example #5
0
mp_limb_t
mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
                mp_size_t n)
{
  mp_size_t k, l;
  mp_limb_t qh, cy;
  mpfr_limb_ptr tp;
  MPFR_TMP_DECL(marker);

  MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */
  k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3);

  if (k == 0)
#if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
  {
    mpfr_pi1_t dinv2;
    invert_pi1 (dinv2, dp[n - 1], dp[n - 2]);
    return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32);
  }
#else /* use our own code for base-case short division */
    return mpfr_divhigh_n_basecase (qp, np, dp, n);
#endif
  else if (k == n)
Example #6
0
/* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
   with the most significant limb of the quotient as return value (0 or 1).
   Assumes the most significant bit of D is set. Clobbers N.

   The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
*/
static mp_limb_t
mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
                         mpfr_limb_srcptr dp, mp_size_t n)
{
  mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
  mpfr_pi1_t dinv2;

  np += n;

  if ((qh = (mpn_cmp (np, dp, n) >= 0)))
    mpn_sub_n (np, np, dp, n);

  /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */

  d1 = dp[n - 1];

  if (n == 1)
    {
      invert_limb (dinv, d1);
      umul_ppmm (q1, q0, np[0], dinv);
      qp[0] = np[0] + q1;
      return qh;
    }

  /* now n >= 2 */
  d0 = dp[n - 2];
  invert_pi1 (dinv2, d1, d0);
  /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
  while (n > 1)
    {
      /* Invariant: it remains to reduce n limbs from N (in addition to the
         initial low n limbs).
         Since n >= 2 here, necessarily we had n >= 2 initially, which means
         that in addition to the limb np[n-1] to reduce, we have at least 2
         extra limbs, thus accessing np[n-3] is valid. */

      /* warning: we can have np[n-1]=d1 and np[n-2]=d0, but since {np,n} < D,
         the largest possible partial quotient is B-1 */
      if (MPFR_UNLIKELY(np[n - 1] == d1 && np[n - 2] == d0))
        q2 = ~ (mp_limb_t) 0;
      else
        udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
                      d1, d0, dinv2.inv32);
      /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
         we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
         thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
         and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
         thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
         which proves that at most one correction is needed */
      q0 = mpn_submul_1 (np - 1, dp, n, q2);
      if (MPFR_UNLIKELY(q0 > np[n - 1]))
        {
          mpn_add_n (np - 1, np - 1, dp, n);
          q2 --;
        }
      qp[--n] = q2;
      dp ++;
    }

  /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
     q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
        <= floor((np[0]*B+np[1])/d1)
     thus q1 is not larger than the true quotient.
     q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
     For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
     thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
     (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
     d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
     thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.

     For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
     np[0]*B/d1 - 2.

     In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
     q - 4 <= q1 <= q
  */
  umul_ppmm (q1, q0, np[0], dinv2.inv32);
  qp[0] = np[0] + q1;

  return qh;
}
Example #7
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, q, r, tz, junk;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dup, dnp, qp, rp, junkp;
  mp_limb_t t;
  gmp_pi1_t dinv;
  long count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }

  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (q);
  mpz_init (r);
  mpz_init (tz);
  mpz_init (junk);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  dnp = TMP_ALLOC_LIMBS (maxdn);

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = -300; test < count; test++)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;

      if (test < 0)
	dbits = (test + 300) % (nbits - 1) + 1;
      else
	dbits = random_word (rands) % (nbits - 1) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	RANDFUNC (d, rands, dbits);
      while (mpz_sgn (d) == 0);
      dn = SIZ (d);
      dup = PTR (d);
      MPN_COPY (dnp, dup, dn);
      dnp[dn - 1] |= GMP_NUMB_HIGHBIT;

      if (test % 2 == 0)
	{
	  RANDFUNC (n, rands, nbits);
	  nn = SIZ (n);
	  ASSERT_ALWAYS (nn >= dn);
	}
      else
	{
	  do
	    {
	      RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1));
	      RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2));
	      mpz_mul (n, q, d);
	      mpz_add (n, n, r);
	      nn = SIZ (n);
	    }
	  while (nn > maxnn || nn < dn);
	}

      ASSERT_ALWAYS (nn <= maxnn);
      ASSERT_ALWAYS (dn <= maxdn);

      mpz_urandomb (junk, rands, nbits);
      junkp = PTR (junk);

      np = PTR (n);

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	{
	  dnp[dn - 1] = GMP_NUMB_MAX;
	  dup[dn - 1] = GMP_NUMB_MAX;
	}

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = clearn; i < nn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	}

      if (dn >= 2)
	invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  /* Test mpn_sbpi1_div_qr */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
	    }

	  /* Test mpn_sbpi1_divappr_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
	    }

	  /* Test mpn_sbpi1_div_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
	    }

	  /* Test mpn_sb_div_qr_sec */
	  itch = 3 * nn + 4;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  if (nn >= dn)
	    MPN_COPY (qp, junkp, nn - dn + 1);
	  mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0);

	  /* Test mpn_sb_div_r_sec */
	  itch = nn + 2 * dn + 2;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  mpn_sb_div_r_sec (rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  /* Note: Since check_one cannot cope with random-only functions, we
	     pass qp[] from the previous function, mpn_sb_div_qr_sec.  */
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0);
	}

      /* Test mpn_dcpi1_div_qr */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
	}

      /* Test mpn_dcpi1_divappr_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
	}

      /* Test mpn_dcpi1_div_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
	}

     /* Test mpn_mu_div_qr */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_qr_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
	}

      /* Test mpn_mu_divappr_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_divappr_q_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
	}

      /* Test mpn_mu_div_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_q_itch (nn, dn, 0);
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
	}

      if (1)
	{
	  itch = nn + 1;
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  mpn_div_q (qp, np, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
	}

      if (dn >= 2 && nn >= 2)
	{
	  mp_limb_t qh;

	  /* mpn_divrem_2 */
	  MPN_COPY (rp, np, nn);
	  qp[nn - 2] = qp[nn-1] = qran1;

	  qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);

	  /* Missing: divrem_2 with fraction limbs. */

	  /* mpn_div_qr_2 */
	  qp[nn - 2] = qran1;

	  qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
	}
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (q);
  mpz_clear (r);
  mpz_clear (tz);
  mpz_clear (junk);

  tests_end ();
  return 0;
}
Example #8
0
void
mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
{
  if (el < 20)
    {
      mp_ptr xp, tp, mp, bp, scratch;
      mp_size_t xn, tn, mn, bn;
      int m_zero_cnt;
      int c;
      mp_limb_t e, m2;
      gmp_pi1_t dinv;
      TMP_DECL;

      mp = PTR(m);
      mn = ABSIZ(m);
      if (UNLIKELY (mn == 0))
	DIVIDE_BY_ZERO;

      if (el == 0)
	{
	  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
	     M equals 1.  */
	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
	  PTR(r)[0] = 1;
	  return;
	}

      TMP_MARK;

      /* Normalize m (i.e. make its most significant bit set) as required by
	 division functions below.  */
      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
      m_zero_cnt -= GMP_NAIL_BITS;
      if (m_zero_cnt != 0)
	{
	  mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
	  mp = new_mp;
	}

      m2 = mn == 1 ? 0 : mp[mn - 2];
      invert_pi1 (dinv, mp[mn - 1], m2);

      bn = ABSIZ(b);
      bp = PTR(b);
      if (bn > mn)
	{
	  /* Reduce possibly huge base.  Use a function call to reduce, since we
	     don't want the quotient allocation to live until function return.  */
	  mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
	  reduce (new_bp, bp, bn, mp, mn, &dinv);
	  bp = new_bp;
	  bn = mn;
	  /* Canonicalize the base, since we are potentially going to multiply with
	     it quite a few times.  */
	  MPN_NORMALIZE (bp, bn);
	}

      if (bn == 0)
	{
	  SIZ(r) = 0;
	  TMP_FREE;
	  return;
	}

      tp = TMP_ALLOC_LIMBS (2 * mn + 1);
      xp = TMP_ALLOC_LIMBS (mn);
      scratch = TMP_ALLOC_LIMBS (mn + 1);

      MPN_COPY (xp, bp, bn);
      xn = bn;

      e = el;
      count_leading_zeros (c, e);
      e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
      c = GMP_LIMB_BITS - 1 - c;

      if (c == 0)
	{
	  /* If m is already normalized (high bit of high limb set), and b is
	     the same size, but a bigger value, and e==1, then there's no
	     modular reductions done and we can end up with a result out of
	     range at the end. */
	  if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
	    mpn_sub_n (xp, xp, mp, mn);
	}
      else
	{
	  /* Main loop. */
	  do
	    {
	      mpn_sqr (tp, xp, xn);
	      tn = 2 * xn; tn -= tp[tn - 1] == 0;
	      if (tn < mn)
		{
		  MPN_COPY (xp, tp, tn);
		  xn = tn;
		}
	      else
		{
		  mod (tp, tn, mp, mn, &dinv, scratch);
		  MPN_COPY (xp, tp, mn);
		  xn = mn;
		}

	      if ((mp_limb_signed_t) e < 0)
		{
		  mpn_mul (tp, xp, xn, bp, bn);
		  tn = xn + bn; tn -= tp[tn - 1] == 0;
		  if (tn < mn)
		    {
		      MPN_COPY (xp, tp, tn);
		      xn = tn;
		    }
		  else
		    {
		      mod (tp, tn, mp, mn, &dinv, scratch);
		      MPN_COPY (xp, tp, mn);
		      xn = mn;
		    }
		}
	      e <<= 1;
	      c--;
	    }
	  while (c != 0);
	}

      /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it
	 with the original M.  */
      if (m_zero_cnt != 0)
	{
	  mp_limb_t cy;
	  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
	  tp[xn] = cy; xn += cy != 0;

	  if (xn < mn)
	    {
	      MPN_COPY (xp, tp, xn);
	    }
	  else
	    {
	      mod (tp, xn, mp, mn, &dinv, scratch);
	      MPN_COPY (xp, tp, mn);
	      xn = mn;
	    }
	  mpn_rshift (xp, xp, xn, m_zero_cnt);
	}
      MPN_NORMALIZE (xp, xn);

      if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
	{
	  mp = PTR(m);			/* want original, unnormalized m */
	  mpn_sub (xp, mp, mn, xp, xn);
	  xn = mn;
	  MPN_NORMALIZE (xp, xn);
	}
      MPZ_REALLOC (r, xn);
      SIZ (r) = xn;
      MPN_COPY (PTR(r), xp, xn);

      TMP_FREE;
    }
  else
    {
      /* For large exponents, fake a mpz_t exponent and deflect to the more
	 sophisticated mpz_powm.  */
      mpz_t e;
      mp_limb_t ep[LIMBS_PER_ULONG];
      MPZ_FAKE_UI (e, ep, el);
      mpz_powm (r, b, e, m);
    }
}
Example #9
0
void
mpn_div_q (mp_ptr qp,
	   mp_srcptr np, mp_size_t nn,
	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
{
  mp_ptr new_dp, new_np, tp, rp;
  mp_limb_t cy, dh, qh;
  mp_size_t new_nn, qn;
  gmp_pi1_t dinv;
  int cnt;
  TMP_DECL;
  TMP_MARK;

  ASSERT (nn >= dn);
  ASSERT (dn > 0);
  ASSERT (dp[dn - 1] != 0);
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));

  ASSERT_ALWAYS (FUDGE >= 2);

  if (dn == 1)
    {
      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
      return;
    }

  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */

  if (qn + FUDGE >= dn)
    {
      /* |________________________|
                          |_______|  */
      new_np = scratch;

      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np, nn, cnt);
	  new_np[nn] = cy;
	  new_nn = nn + (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (dn);
	  mpn_lshift (new_dp, dp, dn, cnt);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
	    {
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
	    {
	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
	    }
	  if (cy == 0)
	    qp[qn - 1] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - dn;
	      for (i = 0; i < n; i++)
		qp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{
	  if (new_np != np)
	    MPN_COPY (new_np, np, nn);

	  if (dn == 2)
	    {
	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
	    }
	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
	    {
	      invert_pi1 (dinv, dh, dp[dn - 2]);
	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
	    {
	      invert_pi1 (dinv, dh, dp[dn - 2]);
	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
	    }
	  qp[nn - dn] = qh;
	}
    }
  else
    {
      /* |________________________|
                |_________________|  */
      tp = TMP_ALLOC_LIMBS (qn + 1);

      new_np = scratch;
      new_nn = 2 * qn + 1;
      if (new_np == np)
	/* We need {np,nn} to remain untouched until the final adjustment, so
	   we need to allocate separate space for new_np.  */
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);


      dh = dp[dn - 1];
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	  count_leading_zeros (cnt, dh);

	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
	  new_np[new_nn] = cy;

	  new_nn += (cy != 0);

	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

	  if (qn + 1 == 2)
	    {
	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
	    }
	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
	    {
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
	    }
	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
	    {
	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
	    }
	  else
	    {
	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
	    }
	  if (cy == 0)
	    tp[qn] = qh;
	  else if (UNLIKELY (qh != 0))
	    {
	      /* This happens only when the quotient is close to B^n and
		 mpn_*_divappr_q returned B^n.  */
	      mp_size_t i, n;
	      n = new_nn - (qn + 1);
	      for (i = 0; i < n; i++)
		tp[i] = GMP_NUMB_MAX;
	      qh = 0;		/* currently ignored */
	    }
	}
      else  /* divisor is already normalised */
	{