Example #1
0
int
mpf_cmp (mpf_srcptr u, mpf_srcptr v)
{
  mp_srcptr up, vp;
  mp_size_t usize, vsize;
  mp_exp_t uexp, vexp;
  int cmp;
  int usign;

  uexp = u->_mp_exp;
  vexp = v->_mp_exp;

  usize = u->_mp_size;
  vsize = v->_mp_size;

  /* 1. Are the signs different?  */
  if ((usize ^ vsize) >= 0)
    {
      /* U and V are both non-negative or both negative.  */
      if (usize == 0)
	/* vsize >= 0 */
	return -(vsize != 0);
      if (vsize == 0)
	/* usize >= 0 */
	return usize != 0;
      /* Fall out.  */
    }
  else
    {
      /* Either U or V is negative, but not both.  */
      return usize >= 0 ? 1 : -1;
    }

  /* U and V have the same sign and are both non-zero.  */

  usign = usize >= 0 ? 1 : -1;

  /* 2. Are the exponents different?  */
  if (uexp > vexp)
    return usign;
  if (uexp < vexp)
    return -usign;

  usize = ABS (usize);
  vsize = ABS (vsize);

  up = u->_mp_d;
  vp = v->_mp_d;

#define STRICT_MPF_NORMALIZATION 0
#if ! STRICT_MPF_NORMALIZATION
  /* Ignore zeroes at the low end of U and V.  */
  while (up[0] == 0)
    {
      up++;
      usize--;
    }
  while (vp[0] == 0)
    {
      vp++;
      vsize--;
    }
#endif

  if (usize > vsize)
    {
      cmp = mpn_cmp (up + usize - vsize, vp, vsize);
      if (cmp == 0)
	return usign;
    }
  else if (vsize > usize)
    {
      cmp = mpn_cmp (up, vp + vsize - usize, usize);
      if (cmp == 0)
	return -usign;
    }
  else
    {
      cmp = mpn_cmp (up, vp, usize);
      if (cmp == 0)
	return 0;
    }
  return cmp > 0 ? usign : -usign;
}
Example #2
0
void
mpn_toom53_mul (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t n, s, t;
  int vm1_neg, vmh_neg;
  mp_limb_t cy;
  mp_ptr gp, hp;
  mp_ptr as1, asm1, as2, ash, asmh;
  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
  enum toom4_flags flags;
  TMP_DECL;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define a3  (ap + 3*n)
#define a4  (ap + 4*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);

  s = an - 4 * n;
  t = bn - 2 * n;

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  TMP_MARK;

  as1  = TMP_SALLOC_LIMBS (n + 1);
  asm1 = TMP_SALLOC_LIMBS (n + 1);
  as2  = TMP_SALLOC_LIMBS (n + 1);
  ash  = TMP_SALLOC_LIMBS (n + 1);
  asmh = TMP_SALLOC_LIMBS (n + 1);

  bs1  = TMP_SALLOC_LIMBS (n + 1);
  bsm1 = TMP_SALLOC_LIMBS (n + 1);
  bs2  = TMP_SALLOC_LIMBS (n + 1);
  bsh  = TMP_SALLOC_LIMBS (n + 1);
  bsmh = TMP_SALLOC_LIMBS (n + 1);

  gp = pp;
  hp = pp + n + 1;

  /* Compute as1 and asm1.  */
  gp[n]  = mpn_add_n (gp, a0, a2, n);
  gp[n] += mpn_add   (gp, gp, n, a4, s);
  hp[n]  = mpn_add_n (hp, a1, a3, n);
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#else
  mpn_add_n (as1, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_sub_n (asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#endif

  /* Compute as2.  */
#if !HAVE_NATIVE_mpn_addlsh_n
  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
#endif
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a3, a4, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy  = mpn_lshift (as2, a4, s, 1);
  cy += mpn_add_n (as2, a3, as2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
  cy += mpn_add_n (as2, a1, as2, n);
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
  mpn_add_n (as2, ash, as2, n + 1);
#endif

  /* Compute ash and asmh.  */
#if HAVE_NATIVE_mpn_addlsh_n
  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
  gp[n] = cy;
  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
  hp[n] = cy;
#else
  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
  hp[n] = cy;
#endif
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#else
  mpn_add_n (ash, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_sub_n (asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#endif

  /* Compute bs1 and bsm1.  */
  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
#if HAVE_NATIVE_mpn_addsub_n
  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
    {
      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
Example #3
0
void
check_n (void)
{
  {
    int  n = -1;
    check_one ("blah", "%nblah", &n);
    ASSERT_ALWAYS (n == 0);
  }

  {
    int  n = -1;
    check_one ("hello ", "hello %n", &n);
    ASSERT_ALWAYS (n == 6);
  }

  {
    int  n = -1;
    check_one ("hello  world", "hello %n world", &n);
    ASSERT_ALWAYS (n == 6);
  }

#define CHECK_N(type, string)                           \
  do {                                                  \
    type  x[2];                                         \
    char  fmt[128];                                     \
                                                        \
    x[0] = ~ (type) 0;                                  \
    x[1] = ~ (type) 0;                                  \
    sprintf (fmt, "%%d%%%sn%%d", string);               \
    check_one ("123456", fmt, 123, &x[0], 456);         \
                                                        \
    /* should write whole of x[0] and none of x[1] */   \
    ASSERT_ALWAYS (x[0] == 3);                          \
    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);		\
                                                        \
  } while (0)

  CHECK_N (mp_limb_t, "M");
  CHECK_N (char,      "hh");
  CHECK_N (long,      "l");
#if HAVE_LONG_LONG
  CHECK_N (long long, "L");
#endif
#if HAVE_INTMAX_T
  CHECK_N (intmax_t,  "j");
#endif
#if HAVE_PTRDIFF_T
  CHECK_N (ptrdiff_t, "t");
#endif
  CHECK_N (short,     "h");
  CHECK_N (size_t,    "z");

  {
    mpz_t  x[2];
    mpz_init_set_si (x[0], -987L);
    mpz_init_set_si (x[1],  654L);
    check_one ("123456", "%d%Zn%d", 123, x[0], 456);
    MPZ_CHECK_FORMAT (x[0]);
    MPZ_CHECK_FORMAT (x[1]);
    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);
    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);
    mpz_clear (x[0]);
    mpz_clear (x[1]);
  }

  {
    mpq_t  x[2];
    mpq_init (x[0]);
    mpq_init (x[1]);
    mpq_set_ui (x[0], -987L, 654L);
    mpq_set_ui (x[1], 4115L, 226L);
    check_one ("123456", "%d%Qn%d", 123, x[0], 456);
    MPQ_CHECK_FORMAT (x[0]);
    MPQ_CHECK_FORMAT (x[1]);
    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);
    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);
    mpq_clear (x[0]);
    mpq_clear (x[1]);
  }

  {
    mpf_t  x[2];
    mpf_init (x[0]);
    mpf_init (x[1]);
    mpf_set_ui (x[0], -987L);
    mpf_set_ui (x[1],  654L);
    check_one ("123456", "%d%Fn%d", 123, x[0], 456);
    MPF_CHECK_FORMAT (x[0]);
    MPF_CHECK_FORMAT (x[1]);
    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);
    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);
    mpf_clear (x[0]);
    mpf_clear (x[1]);
  }

  {
    mp_limb_t  a[5];
    mp_limb_t  a_want[numberof(a)];
    mp_size_t  i;

    a[0] = 123;
    check_one ("blah", "bl%Nnah", a, (mp_size_t) 0);
    ASSERT_ALWAYS (a[0] == 123);

    MPN_ZERO (a_want, numberof (a_want));
    for (i = 1; i < numberof (a); i++)
      {
        check_one ("blah", "bl%Nnah", a, i);
        a_want[0] = 2;
        ASSERT_ALWAYS (mpn_cmp (a, a_want, i) == 0);
      }
  }
}
Example #4
0
/* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
   with the most significant limb of the quotient as return value (0 or 1).
   Assumes the most significant bit of D is set. Clobbers N.

   The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
*/
static mp_limb_t
mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
                         mpfr_limb_srcptr dp, mp_size_t n)
{
  mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
  mpfr_pi1_t dinv2;

  np += n;

  if ((qh = (mpn_cmp (np, dp, n) >= 0)))
    mpn_sub_n (np, np, dp, n);

  /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */

  d1 = dp[n - 1];

  if (n == 1)
    {
      invert_limb (dinv, d1);
      umul_ppmm (q1, q0, np[0], dinv);
      qp[0] = np[0] + q1;
      return qh;
    }

  /* now n >= 2 */
  d0 = dp[n - 2];
  invert_pi1 (dinv2, d1, d0);
  /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
  while (n > 1)
    {
      /* Invariant: it remains to reduce n limbs from N (in addition to the
         initial low n limbs).
         Since n >= 2 here, necessarily we had n >= 2 initially, which means
         that in addition to the limb np[n-1] to reduce, we have at least 2
         extra limbs, thus accessing np[n-3] is valid. */

      /* Warning: we can have np[n-1]>d1 or (np[n-1]=d1 and np[n-2]>=d0) here,
         since we truncate the divisor at each step, but since {np,n} < D
         originally, the largest possible partial quotient is B-1. */
      if (MPFR_UNLIKELY(np[n-1] > d1 || (np[n-1] == d1 && np[n-2] >= d0)))
        q2 = MPFR_LIMB_MAX;
      else
        udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
                      d1, d0, dinv2.inv32);
      /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
         we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
         thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
         and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
         thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
         which proves that at most one correction is needed */
      q0 = mpn_submul_1 (np - 1, dp, n, q2);
      if (MPFR_UNLIKELY(q0 > np[n - 1]))
        {
          mpn_add_n (np - 1, np - 1, dp, n);
          q2 --;
        }
      qp[--n] = q2;
      dp ++;
    }

  /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
     q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
        <= floor((np[0]*B+np[1])/d1)
     thus q1 is not larger than the true quotient.
     q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
     For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
     thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
     (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
     d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
     thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.

     For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
     np[0]*B/d1 - 2.

     In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
     q - 4 <= q1 <= q
  */
  umul_ppmm (q1, q0, np[0], dinv2.inv32);
  qp[0] = np[0] + q1;

  return qh;
}
Example #5
0
mp_limb_t
mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 
		    mp_limb_t dip, mp_limb_t d1ip, mp_ptr tp)
{
  mp_limb_t qh, cy;
  mp_ptr q_hi;
  mp_size_t m;
  mp_limb_t ret = 0;

  ASSERT (n >= 6);

  /* if the top n limbs of np are >= dp, high limb of quotient is 1 */
  if (mpn_cmp(np + n, dp, n) >= 0)
  {
     ret = 1;
     mpn_sub_n(np + n, np + n, dp, n);
  }

  /* top n limbs of np are now < dp */

  m = (n + 1) / 2;
  q_hi = qp + n - m;

  /* 
     FIXME: we could probably avoid this copy if we could guarantee 
     that sb_div_appr_q/dc_divappr_q_n did not destroy the "bottom 
     half" of N */
  MPN_COPY (tp, np, 2*n);

  /* estimate high m+1 limbs of quotient, using a 2*m by m division
     the quotient may be computed 1 too large as it is approximate, 
     moreover, even computed precisely it may be two too large due
     to the truncation we've done to a 2*m by m division... */
  if (m < DC_DIVAPPR_Q_N_THRESHOLD)
    qh = mpn_sb_divappr_q (q_hi, tp + 2*n - 2*m, 2*m,
			   dp + n - m, m, dip, d1ip);
  else
    qh = mpn_dc_divappr_q_n (q_hi, tp + 2*n - 2*m,
			     dp + n - m, m, dip, d1ip, tp + 2*n);

  /* we therefore decrease the estimate by 3... */
  qh -= mpn_sub_1 (q_hi, q_hi, m, (mp_limb_t) 3);
  
  /* ensuring it doesn't become negative */
  if (qh & GMP_NUMB_HIGHBIT)
    {
      MPN_ZERO (q_hi, m);
      qh = 0;
    }
  
  /* note qh is now always zero as the quotient we have is definitely
     correct or up to two too small, and we already normalised np */
  ASSERT (qh == 0);
  
  /* we know that {np+n-m, n+m} = q_hi * D + e0, where 0 <= e0 < C*B^n, 
     where C is a small positive constant. Estimate q_hi * D using 
     middle product, developing one additional limb, i.e. develop
     n - m + 3 limbs. The bottom limb is meaningless and the next limb
     may be too small by up to some small multiple of n, but recall 
     n << B. */
  mpn_mulmid (tp, dp, n, q_hi + 1, m - 2);

  /* do some parts of the middle product "manually": */
  tp[n - m + 2] += mpn_addmul_1 (tp, dp + m - 2, n - m + 2, q_hi[0]);
  mpn_addmul_1 (tp + 1, dp, n - m + 2, q_hi[m-1]);
  
  /* subtract that estimate from N. We note the limb at np + n - 2 
     is then meaningless, and the next limb mght be too large by a 
     small amount, i.e. the bottom n limbs of np are now possibly
     too large by a quantity much less than dp */
  mpn_sub_n (np + n - 2, np + n - 2, tp, n - m + 3);

  /* recursively divide to obtain low half of quotient, developing
     one more limb than we would need if everything had been exact.
     As this extra limb is out by only a small amount, rounding the
     remaining limbs based on its value and discarding the extra limb
     results in a quotient which is at most 1 too large */
  if (n - m + 2 < DC_DIVAPPR_Q_N_THRESHOLD)
    cy = mpn_sb_divappr_q (tp, np + m - 3, 2*n - 2*m + 4,
			   dp + m - 2, n - m + 2, dip, d1ip);
  else
    cy = mpn_dc_divappr_q_n (tp, np + m - 3, dp + m - 2, n - m + 2,
			     dip, d1ip, tp + n - m + 2);

  /* FIXME: The only reason this copy happens is that we elected to 
     develop one extra quotient limb in the second recursive quotient. */
  MPN_COPY (qp, tp + 1, n - m);

  /* Construct final quotient from low and hi parts... */
  ret += mpn_add_1 (qp + n - m, qp + n - m, m, tp[n-m+1]);
  ret += mpn_add_1 (qp + n - m + 1, qp + n - m + 1, m - 1, cy);
  if (tp[0] >= GMP_NUMB_HIGHBIT)
    ret += mpn_add_1 (qp, qp, n, 1);   /* ...rounding quotient up */

  /* As the final quotient may be 1 too large, we may have ret == 2 
     (it is very unlikely, but can be relatively easily triggered
     at random when dp = 0x80000...0000), then Q must be 2000.... 
     and we should return instead 1ffff.... */
  if (ret == 2)
    {
      ret -= mpn_sub_1 (qp, qp, n, 1);
      ASSERT (ret == 1);
    }

  return ret;
}
Example #6
0
static int
do_test (void)
{
  mp1 ex, x, xt, e2, e3;
  int i;
  int errors = 0;
  int failures = 0;
  mp1 maxerror;
  int maxerror_s = 0;
  const double sf = pow (2, mpbpl);

  /* assert(mpbpl == mp_bits_per_limb); */
  assert(FRAC / mpbpl * mpbpl == FRAC);

  memset (maxerror, 0, sizeof (mp1));
  memset (xt, 0, sizeof (mp1));
  xt[(FRAC - N2) / mpbpl] = (mp_limb_t)1 << (FRAC - N2) % mpbpl;

  for (i = 0; i < (1 << N2); ++i)
    {
      int e2s, e3s, j;
      double de2;

      mpn_mul_1 (x, xt, SZ, i);
      exp2_mpn (ex, x);
      de2 = exp2 (i / (double) (1 << N2));
      for (j = SZ - 1; j >= 0; --j)
	{
	  e2[j] = (mp_limb_t) de2;
	  de2 = (de2 - e2[j]) * sf;
	}
      if (mpn_cmp (ex, e2, SZ) >= 0)
	mpn_sub_n (e3, ex, e2, SZ);
      else
	mpn_sub_n (e3, e2, ex, SZ);

      e2s = mpn_bitsize (e2, SZ);
      e3s = mpn_bitsize (e3, SZ);
      if (e3s >= 0 && e2s - e3s < 54)
	{
#if PRINT_ERRORS
	  printf ("%06x ", i * (0x100000 / (1 << N2)));
	  print_mpn_fp (ex, (FRAC / 4) + 1, 16);
	  putchar ('\n');
	  fputs ("       ",stdout);
	  print_mpn_fp (e2, (FRAC / 4) + 1, 16);
	  putchar ('\n');
	  printf (" %c     ",
		  e2s - e3s < 54 ? e2s - e3s == 53 ? 'e' : 'F' : 'P');
	  print_mpn_fp (e3, (FRAC / 4) + 1, 16);
	  putchar ('\n');
#endif
	  errors += (e2s - e3s == 53);
	  failures += (e2s - e3s < 53);
	}
      if (e3s >= maxerror_s
	  && mpn_cmp (e3, maxerror, SZ) > 0)
	{
	  memcpy (maxerror, e3, sizeof (mp1));
	  maxerror_s = e3s;
	}
    }

  /* Check exp_mpn against precomputed value of exp(1).  */
  memset (x, 0, sizeof (mp1));
  x[FRAC / mpbpl] = (mp_limb_t)1 << FRAC % mpbpl;
  exp_mpn (ex, x);
  if (mpn_cmp (ex, mp_exp1, SZ) >= 0)
    mpn_sub_n (e3, ex, mp_exp1, SZ);
  else
    mpn_sub_n (e3, mp_exp1, ex, SZ);

  printf ("%d failures; %d errors; error rate %0.2f%%\n", failures, errors,
	  errors * 100.0 / (double) (1 << N2));
  fputs ("maximum error:   ", stdout);
  print_mpn_fp (maxerror, (FRAC / 4) + 1, 16);
  putchar ('\n');
  fputs ("error in exp(1): ", stdout);
  print_mpn_fp (e3, (FRAC / 4) + 1, 16);
  putchar ('\n');

  return failures == 0 ? 0 : 1;
}
Example #7
0
mp_size_t
mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
{
  mp_size_t init_scratch, orig_n = n;
  mp_size_t scratch, un, u0n, u1n;
  mp_limb_t t;
  mp_ptr tp, u0, u1;
  int swapped = 0;
    struct ngcd_matrix M;
    mp_size_t p;
    mp_size_t nn;
  mp_limb_signed_t a;
  int c;
  TMP_DECL;
  
  ASSERT (an >= n);
  
  if (an == 1)
  {
    if (!n)
    {
       /* shouldn't ever occur, but we include for completeness */
		gp[0] = ap[0];
       s0p[0] = 1;
       *s0size = 1;
       
	   return 1;
    }
    
	gp[0] = mpn_gcdinv_1(&a, ap[0], bp[0]);
    if (a < (mp_limb_signed_t) 0)
	{
	   s0p[0] = -a;
       (*s0size) = -1;
	} else
    {
	   s0p[0] = a;
       (*s0size) = 1 - (s0p[0] == 0);
	}
	
	return 1;
  }

  init_scratch = MPN_NGCD_MATRIX_INIT_ITCH (n-P_SIZE(n));
  scratch = mpn_nhgcd_itch ((n+1)/2);

  /* Space needed for mpn_ngcd_matrix_adjust */
  if (scratch < 2*n)
    scratch = 2*n;
  if (scratch < an - n + 1) /* the first division can sometimes be selfish!! */
	 scratch = an - n + 1;

 /* Space needed for cofactor adjust */
  scratch = MAX(scratch, 2*(n+1) + P_SIZE(n) + 1);

  TMP_MARK;
  
  if (5*n + 2 + MPN_GCD_LEHMER_N_ITCH(n) > init_scratch + scratch) 
    tp = TMP_ALLOC_LIMBS (7*n+4+MPN_GCD_LEHMER_N_ITCH(n)); /* 2n+2 for u0, u1, 5*n+2 + MPN_GCD_LEHMER_N_ITCH(n) for Lehmer
                                                              and copies of ap and bp and s (and finally 3*n+1 for t and get_t) */
  else
    tp = TMP_ALLOC_LIMBS (2*(n+1) + init_scratch + scratch);
    
  if (an > n)
    {
      mp_ptr qp = tp;

      mpn_tdiv_qr (qp, ap, 0, ap, an, bp, n);
      
      an = n;
      MPN_NORMALIZE (ap, an);
      if (an == 0)
	{	  
	  MPN_COPY (gp, bp, n);
	  TMP_FREE;
	  (*s0size) = 0;
	  
	  return n;
	}
    }
    
    if (BELOW_THRESHOLD (n, GCDEXT_THRESHOLD))
    {
      n = mpn_ngcdext_lehmer (gp, s0p, s0size, ap, bp, n, tp);
      TMP_FREE;
      
	  return n;
    }
  
    u0 = tp; /* Cofactor space */
    u1 = tp + n + 1;

    MPN_ZERO(tp, 2*(n+1));

    tp += 2*(n+1);
  
    /* First iteration, setup u0 and u1 */

    p = P_SIZE(n);
  
    mpn_ngcd_matrix_init (&M, n - p, tp);
	 ASSERT(tp + init_scratch > M.p[1][1] + M.n);
	 nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch);
  if (nn > 0)
	 {
		 n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch);
		 
		 /* 
            (ap'', bp'')^T = M^-1(ap', bp')^T 
		    and (ap', bp') = (1*ap + ?*bp, 0*ap + ?*bp) 
		    We let u0 be minus the factor of ap appearing 
            in the expression for bp'' and u1 be the 
            factor of ap appearing in the expression for ap''
        */

       MPN_COPY(u0, M.p[1][0], M.n);
	    MPN_COPY(u1, M.p[1][1], M.n);

	    un = M.n;
	    while ((u0[un-1] == 0) && (u1[un-1] == 0)) un--; /* normalise u0, u1, both cannot be zero as det = 1*/
     }
  else	
	 {
	   mp_size_t gn;

		un = 1;
	   u0[0] = 0; /* bp = 0*ap + ?*bp, thus u0 = -0 */
	   u1[0] = 1; /* ap = 1*ap + ?*bp, thus u1 = 1 */
   
	   n = mpn_ngcdext_subdiv_step (gp, &gn, s0p, u0, u1, &un, ap, bp, n, tp);
	 if (n == 0)
	   {
	      /* never observed to occur */
		   (*s0size) = un;
			ASSERT(s0p[*s0size - 1] != 0);
		   TMP_FREE;
	       
		   return gn;
	   }
	 } 

  while (ABOVE_THRESHOLD (n, GCDEXT_THRESHOLD))
    {
      struct ngcd_matrix M;
      mp_size_t p = P_SIZE(n);
      mp_size_t nn;
      
      mpn_ngcd_matrix_init (&M, n - p, tp);
      nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch);
		if (nn > 0)
	{
	   n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch);

		ngcdext_cofactor_adjust(u0, u1, &un, &M, tp + init_scratch);
		
		/* 
            (ap'', bp'')^T = M^-1(ap', bp')^T 
		    and (ap', bp') = (u1*ap + ?*bp, -u0*ap + ?*bp) 
		    So we need u0' = -(-c*u1 + a*-u0) = a*u0 + c*u1
            and we need u1' = (d*u1 -b*-u0) = b*u0 + d*u1 
        */

     
		ASSERT(un <= orig_n + 1);

	}  else	
	{
	  mp_size_t gn;
	  n = mpn_ngcdext_subdiv_step (gp, &gn, s0p, u0, u1, &un, ap, bp, n, tp);
	  ASSERT(un <= orig_n + 1);
	  if (n == 0)
	    {
	      (*s0size) = un;
			ASSERT(((*s0size) == 0) || (s0p[ABS(*s0size) - 1] != 0));
		   TMP_FREE;
		   
		   return gn;
	    }
	}
    }

  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
  ASSERT (u0[un-1] > 0 || u1[un-1] > 0);

  if (ap[n-1] < bp[n-1])
  {
	  MP_PTR_SWAP (ap, bp);
	  MP_PTR_SWAP (u0, u1);
	  swapped = 1;
  }
   
  an = n; /* {ap, an} and {bp, bn} are normalised, {ap, an} >= {bp, bn} */
  MPN_NORMALIZE (bp, n);

  if (n == 0)
    {
      /* If bp == 0 then gp = ap
		   with cofactor u1
			If we swapped then cofactor is -u1
			This case never seems to happen
		*/
		MPN_COPY (gp, ap, an);
		MPN_NORMALIZE(u1, un);
		MPN_COPY(s0p, u1, un);
      (*s0size) = un;
		if (swapped) (*s0size) = -(*s0size);
      TMP_FREE;
      
	  return an;
    }

  /* 
     If at this point we have s*ap' + t*bp' = gp where gp is the gcd
	  and (ap', bp') = (u1*ap + ?*bp, -u0*ap + ?*bp)
	  then gp = s*u1*ap - t*u0*ap + ?*bp
	  and the cofactor we want is (s*u1-t*u0).

	  First there is the special case u0 = 0, u1 = 1 in which case we do not need 
	  to compute t...
  */
    
  ASSERT(u1 + un <= tp);
  u0n = un;
  MPN_NORMALIZE(u0, u0n);  /* {u0, u0n} is now normalised */

  if (u0n == 0) /* u1 = 1 case is rare*/
  {
	  mp_size_t gn;
	 
	  gn = mpn_ngcdext_lehmer (gp, s0p, s0size, ap, bp, n, tp);
	  if (swapped) (*s0size) = -(*s0size);
	  TMP_FREE;
	  
	  return gn;
  }
  else
  {
	  /* Compute final gcd. */
  
	  mp_size_t gn, sn, tn;
	  mp_ptr s, t;
	  mp_limb_t cy;
	  int negate = 0;
	  
      /* Save an, bn first as gcdext destroys inputs */
	  s = tp;
	  tp += an;
	  
     MPN_COPY(tp, ap, an);
	  MPN_COPY(tp + an, bp, an);
	  
	  if (mpn_cmp(tp, tp + an, an) == 0) 
	  {
	     /* gcd is tp or tp + an 
		    return smallest cofactor, either -u0 or u1
		 */
	     gn = an;
		 MPN_NORMALIZE(tp, gn);
		 MPN_COPY(gp, tp, gn);
		 
		 MPN_CMP(c, u0, u1, un);
		 if (c < (mp_limb_signed_t) 0)
		 {
		    MPN_COPY(s0p, u0, u0n);
			(*s0size) = -u0n;
		 } else
		 {
		    MPN_NORMALIZE(u1, un);
			MPN_COPY(s0p, u1, un);
			(*s0size) = un;
		 }
		 TMP_FREE;
		  
		 return gn;
	  }

      gn = mpn_ngcdext_lehmer (gp, s, &sn, tp, tp + an, an, tp + 2*an);
      
	  /* Special case, s == 0, t == 1, cofactor = -u0 case is rare*/

	  if (sn == 0)
	  {
		  MPN_COPY(s0p, u0, u0n);
		  (*s0size) = -u0n;
		  if (swapped) (*s0size) = -(*s0size);
		  TMP_FREE;
		  
		  return gn;
	  }

	  /* We'll need the other cofactor t = (gp - s*ap)/bp 
		*/

	  t = tp;
	  tp += (an + 1);
		 
	  gcdext_get_t(t, &tn, gp, gn, ap, an, bp, n, s, sn, tp);

	  ASSERT((tn == 0) || (t[tn - 1] > 0)); /* {t, tn} is normalised */

	  ASSERT(tn <= an + 1);

	  /* We want to compute s*u1 - t*u0, so if s is negative
	     t will be positive, so we'd be dealing with negative
		  numbers. We fix that here.
	  */

	  if (sn < 0)
	  {
		  sn = -sn;
		  negate = 1;
	  }

	  /* Now we can deal with the special case u1 = 0 */

	  u1n = un; 
	  MPN_NORMALIZE(u1, u1n); /* {u1, u1n} is now normalised */
     
	  if (u1n == 0) /* case is rare */
	  {
		  MPN_COPY(s0p, t, tn);
		  (*s0size) = -tn;
		  if (swapped ^ negate) (*s0size) = -(*s0size);
		  TMP_FREE;
		  
		  return gn;
	  }

	  /* t may be zero, but we need to compute s*u1 anyway */
	  if (sn >= u1n)
		  mpn_mul(s0p, s, sn, u1, u1n);
	  else
		  mpn_mul(s0p, u1, u1n, s, sn);

	  (*s0size) = sn + u1n;
	  (*s0size) -= (s0p[sn + u1n - 1] == 0);

	  ASSERT(s0p[*s0size - 1] > 0); /* {s0p, *s0size} is normalised now */

	  if (tn == 0) /* case is rare */
	  {
		  if (swapped ^ negate) (*s0size) = -(*s0size);
        TMP_FREE;
	    
		return gn;
	  }

	  /* Now compute the rest of the cofactor, t*u0
	     and subtract it
		  We're done with u1 and s which happen to be
		  consecutive, so use that space
	  */

	  ASSERT(u1 + tn + u0n <= t);

     if (tn > u0n)
		  mpn_mul(u1, t, tn, u0, u0n);
	  else
		  mpn_mul(u1, u0, u0n, t, tn);

	  u1n = tn + u0n;
	  u1n -= (u1[tn + u0n - 1] == 0);

	  ASSERT(u1[u1n - 1] > 0);

	  /* Recall t is now negated so s*u1 - t*u0 
	     involves an *addition* 
	  */

	  if ((*s0size) >= u1n)
	  {
		  cy = mpn_add(s0p, s0p, *s0size, u1, u1n);
		  if (cy) s0p[(*s0size)++] = cy;
	  }
	  else
	  {
		  cy = mpn_add(s0p, u1, u1n, s0p, *s0size);
        (*s0size) = u1n;
	     if (cy) s0p[(*s0size)++] = cy;
	  }

	  if (swapped ^ negate) (*s0size) = -(*s0size);
     TMP_FREE;  
     
	 return gn;
  }
}
Example #8
0
void
mpn_toom3_sqr_n (mp_ptr c, mp_srcptr a, mp_size_t n, mp_ptr t)
{
  mp_size_t k, k1, kk1, r, twok, rr2;
  mp_limb_t cy, cc, saved, vinf0;
  mp_ptr trec;
  int sa;
  mp_ptr c1, c2, c3, c4, c5, t1, t2, t3, t4;

  ASSERT(GMP_NUMB_BITS >= 6);

  k = (n + 2) / 3; /* ceil(n/3) */
  ASSERT(GMP_NUMB_BITS >= 6);
  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */

  twok = 2 * k;
  k1 = k + 1;
  kk1 = k + k1;
  r = n - twok;   /* last chunk */
  rr2 = 2*r;

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;
  
  t1 = t + k;
  t2 = t1 + k;
  t3 = t2 + k;
  t4 = t3 + k;

  trec = t + 4 * k + 3; 

  /* put a0+a2 in {c, k+1}
     put a0+a1+a2 in {t2 + 1, k+1}
  */
  cy = mpn_add_n (c, a, a + twok, r);
  if (r < k)
    {
      __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);
   }
  t3[1] = (c1[0] = cy) + mpn_add_n (t2 + 1, c, a + k, k);

  /* compute v1 := (a0+a1+a2)^2 in {c2, 2k+1};
     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */
  TOOM3_SQR_REC (c2, t2 + 1, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1
  */

  /* put |a0-a1+a2| in {c,k+1} */
  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);
  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)
		   : mpn_sub_n (c, a + k, c, k);
  
  /* compute vm1 := (a0-a1+a2)^2 in {t, 2k+1};
     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */
  TOOM3_SQR_REC (t, c, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1
  */

  /* 
     compute a0+2a1+4a2 in {c, k+1}
  */
#if HAVE_NATIVE_mpn_addlsh1_n
  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
  if (r < k)
    {
      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
    }
  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
#else
  c[r] = mpn_lshift1 (c, a + twok, r);
  if (r < k)
    {
      MPN_ZERO(c + r + 1, k - r);
    }
  c1[0] += mpn_add_n (c, c, a + k, k);
  mpn_double (c, k1);
  c1[0] += mpn_add_n (c, c, a, k);
#endif

#define v2 (t+2*k+1)

  /* compute v2 := (a0+2a1+4a2)^2 in {t+2k+1, 2k+1}
     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */
  TOOM3_SQR_REC (v2, c, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2
  */

  /* compute v0 := a0^2 in {c, 2k} */
  TOOM3_SQR_REC (c, a, k, trec);

 /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 		v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2
  */

#define vinf (c+4*k)

  /* compute vinf := a2*b2 in {c4, r + r2},
  */
  saved = c4[0];

  TOOM3_SQR_REC (c4, a + twok, r, trec);
  
  vinf0 = c4[0];
  c4[0] = saved;

 /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 		v1        {-}vinf

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2

	  vinf0 = {-}
  */

  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, 1, vinf0, t4+2);

#undef v2
#undef vinf
}
Example #9
0
/* The necessary temporary space T(n) satisfies T(n)=0 for n < THRESHOLD,
   and T(n) <= max(2n+2, 6k+3, 4k+3+T(k+1)) otherwise, where k = ceil(n/3).

   Assuming T(n) >= 2n, 6k+3 <= 4k+3+T(k+1).
   Similarly, 2n+2 <= 6k+2 <= 4k+3+T(k+1).

   With T(n) = 2n+S(n), this simplifies to S(n) <= 9 + S(k+1).
   Since THRESHOLD >= 17, we have n/(k+1) >= 19/8
   thus S(n) <= S(n/(19/8)) + 9 thus S(n) <= 9*log(n)/log(19/8) <= 8*log2(n).

   We need in addition 2*r for mpn_sublsh1_n, so the total is at most
   8/3*n+8*log2(n).
*/
void
mpn_toom3_mul_n (mp_ptr c, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr t)
{
  mp_size_t k, k1, kk1, r, twok, rr2;
  mp_limb_t cy, cc, saved, vinf0;
  mp_ptr trec;
  int sa, sb;
  mp_ptr c1, c2, c3, c4, c5, t1, t2, t3, t4;

  ASSERT(GMP_NUMB_BITS >= 6);

  k = (n + 2) / 3; /* ceil(n/3) */
  ASSERT(GMP_NUMB_BITS >= 6);
  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */

  twok = 2 * k;
  k1 = k + 1;
  kk1 = k + k1;
  r = n - twok;   /* last chunk */
  rr2 = 2*r;

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;
  
  t1 = t + k;
  t2 = t1 + k;
  t3 = t2 + k;
  t4 = t3 + k;

  trec = t + 4 * k + 4; 

  /* put a0+a2 in {c, k+1}, and b0+b2 in {c4 + 2, k+1};
     put a0+a1+a2 in {t2 + 1, k+1} and b0+b1+b2 in {t3 + 2,k+1}
  */
  c1[0] = mpn_add_n (c, a, a + twok, r);
  c5[2] = mpn_add_n (c4 + 2, b, b + twok, r);
  if (r < k)
    {
      c1[0] = mpn_add_1 (c + r, a + r, k - r, c1[0]);
	   c5[2] = mpn_add_1 (c4 + 2 + r, b + r, k - r, c5[2]);
    }
  t3[1] = c1[0] + mpn_add_n (t2 + 1, c, a + k, k);
  t4[2] = c5[2] + mpn_add_n (t3 + 2, c4 + 2, b + k, k);

  ASSERT(c1[0] < 2);
  ASSERT(c5[2] < 2);
  ASSERT(t3[1] < 3);
  ASSERT(t4[2] < 3);

  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {c2, 2k+1};
     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */
  TOOM3_MUL_REC (c2, t2 + 1, t3 + 2, k1, trec);

  ASSERT(c2[k+k] < 9);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1
  */

  /* put |a0-a1+a2| in {c,k+1} and |b0-b1+b2| in {c4 + 2,k+1} */
  /* sa = sign(a0-a1+a2) */
  /* sb = sign(b0-b1+b2) */
  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);
  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)
		   : mpn_sub_n (c, a + k, c, k);
  /* b0+b2 is in {c4+2, k+1} now */
  sb = (c5[2] != 0) ? 1 : mpn_cmp (c4 + 2, b + k, k);
  c5[2] = (sb >= 0) ? c5[2] - mpn_sub_n (c4 + 2, c4 + 2, b + k, k)
		    : mpn_sub_n (c4 + 2, b + k, c4 + 2, k);
  
  ASSERT(c[k] < 2);
  ASSERT(c5[2] < 2);

  sa *= sb; /* sign of vm1 */

  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {t, 2k+1};
     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */
  TOOM3_MUL_REC (t, c, c4 + 2, k1, trec);

  ASSERT(t[k+k] < 4);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1
  */

  /* 
     compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c4 + 2, k+1}
  */
#if HAVE_NATIVE_mpn_addlsh1_n
  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
  c5[2] = mpn_addlsh1_n (c4 + 2, b + k, b + twok, r);
  if (r < k)
    {
      c1[0] = mpn_add_1(c + r, a + k + r, k - r, c1[0]);
      c5[2] = mpn_add_1(c4 + 2 + r, b + k + r, k - r, c5[2]);
    }
  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
  c5[2] = 2 * c5[2] + mpn_addlsh1_n (c4 + 2, b, c4 + 2, k);
#else
  c[r] = mpn_lshift1 (c, a + twok, r);
  c4[r + 2] = mpn_lshift1 (c4 + 2, b + twok, r);
  if (r < k)
    {
      MPN_ZERO(c + r + 1, k - r);
      MPN_ZERO(c4 + r + 3, k - r);
    }
  c1[0] += mpn_add_n (c, c, a + k, k);
  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b + k, k);
  mpn_double (c, k1);
  mpn_double (c4 + 2, k1);
  c1[0] += mpn_add_n (c, c, a, k);
  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b, k);
#endif

  ASSERT(c[k] < 7);
  ASSERT(c5[2] < 7);

#define v2 (t+2*k+1)

  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}
     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */
  TOOM3_MUL_REC (v2, c, c4 + 2, k1, trec);

  ASSERT(v2[k+k] < 49);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
					v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2
  */

  /* compute v0 := a0*b0 in {c, 2k} */
  TOOM3_MUL_REC (c, a, b, k, trec);

 /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 		v1

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2
  */

#define vinf (c+4*k)

  /* compute vinf := a2*b2 in {c4, r + r2},
  */
  saved = c4[0];

  TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);
  
  vinf0 = c4[0];
  c4[0] = saved;
  
 /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 		v1        {-}vinf

	  {t, 2k+1} {t+2k+1, 2k + 1}
	     vm1        v2

	  vinf0 = {-}
  */

  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, sa, vinf0, t4+2);

#undef v2
#undef vinf
}
Example #10
0
void
mpf_ui_sub (mpf_ptr r, mpir_ui u, mpf_srcptr v)
{
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t uexp;
  mp_size_t ediff;
  int negate;
  mp_limb_t ulimb;
  TMP_DECL;

  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      mpf_set_ui (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if (vsize < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add_ui (r, &v_negated, u);
      return;
    }

  TMP_MARK;

  /* Signs are now known to be the same.  */

  ulimb = u;
  /* Make U be the operand with the largest exponent.  */
  if (1 < v->_mp_exp)
    {
      negate = 1;
      usize = ABS (vsize);
      vsize = 1;
      up = v->_mp_d;
      vp = &ulimb;
      rp = r->_mp_d;
      prec = r->_mp_prec + 1;
      uexp = v->_mp_exp;
      ediff = uexp - 1;
    }
  else
    {
      negate = 0;
      usize = 1;
      vsize = ABS (vsize);
      up = &ulimb;
      vp = v->_mp_d;
      rp = r->_mp_d;
      prec = r->_mp_prec;
      uexp = 1;
      ediff = 1 - v->_mp_exp;
    }

  /* Ignore leading limbs in U and V that are equal.  Doing
     this helps increase the precision of the result.  */
  if (ediff == 0)
    {
      /* This loop normally exits immediately.  Optimize for that.  */
      for (;;)
	{
	  usize--;
	  vsize--;
	  if (up[usize] != vp[vsize])
	    break;
	  uexp--;
	  if (usize == 0)
	    goto Lu0;
	  if (vsize == 0)
	    goto Lv0;
	}
      usize++;
      vsize++;
      /* Note that either operand (but not both operands) might now have
	 leading zero limbs.  It matters only that U is unnormalized if
	 vsize is now zero, and vice versa.  And it is only in that case
	 that we have to adjust uexp.  */
      if (vsize == 0)
      Lv0:
	while (usize != 0 && up[usize - 1] == 0)
	  usize--, uexp--;
      if (usize == 0)
      Lu0:
	while (vsize != 0 && vp[vsize - 1] == 0)
	  vsize--, uexp--;
    }

  /* If U extends beyond PREC, ignore the part that does.  */
  if (usize > prec)
    {
      up += usize - prec;
      usize = prec;
    }

  /* If V extends beyond PREC, ignore the part that does.
     Note that this may make vsize negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

  /* Allocate temp space for the result.  Allocate
     just vsize + ediff later???  */
  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (tp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      for (;;)
	{
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      /* Have to compare the leading limbs of u and v
		 to determine whether to compute u - v or v - u.  */
	      if (usize > vsize)
		{
		  /* uuuu     */
		  /* vv       */
		  int cmp;
		  cmp = mpn_cmp (up + usize - vsize, vp, vsize);
		  if (cmp >= 0)
		    {
		      mp_size_t size;
		      size = usize - vsize;
		      MPN_COPY (tp, up, size);
		      mpn_sub_n (tp + size, up + size, vp, vsize);
		      rsize = usize;
		    }
		  else
		    {
		      /* vv       */  /* Swap U and V. */
		      /* uuuu     */
		      mp_size_t size, i;
		      size = usize - vsize;
		      tp[0] = -up[0] & GMP_NUMB_MASK;
		      for (i = 1; i < size; i++)
			tp[i] = ~up[i] & GMP_NUMB_MASK;
		      mpn_sub_n (tp + size, vp, up + size, vsize);
		      mpn_sub_1 (tp + size, tp + size, vsize, (mp_limb_t) 1);
		      negate ^= 1;
		      rsize = usize;
		    }
		}
	      else if (usize < vsize)
		{
		  /* uuuu     */
		  /* vvvvvvv  */
		  int cmp;
		  cmp = mpn_cmp (up, vp + vsize - usize, usize);
		  if (cmp > 0)
		    {
		      mp_size_t size, i;
		      size = vsize - usize;
		      tp[0] = -vp[0] & GMP_NUMB_MASK;
		      for (i = 1; i < size; i++)
			tp[i] = ~vp[i] & GMP_NUMB_MASK;
		      mpn_sub_n (tp + size, up, vp + size, usize);
		      mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		      rsize = vsize;
		    }
		  else
		    {
		      /* vvvvvvv  */  /* Swap U and V. */
		      /* uuuu     */
		      /* This is the only place we can get 0.0.  */
		      mp_size_t size;
		      size = vsize - usize;
		      MPN_COPY (tp, vp, size);
		      mpn_sub_n (tp + size, vp + size, up, usize);
		      negate ^= 1;
		      rsize = vsize;
		    }
		}
	      else
		{
		  /* uuuu     */
		  /* vvvv     */
		  int cmp;
		  cmp = mpn_cmp (up, vp + vsize - usize, usize);
		  if (cmp > 0)
		    {
		      mpn_sub_n (tp, up, vp, usize);
		      rsize = usize;
		    }
		  else
		    {
		      mpn_sub_n (tp, vp, up, usize);
		      negate ^= 1;
		      rsize = usize;
		      /* can give zero */
		    }
		}
	    }
	  else
	    {
	      if (vsize + ediff <= usize)
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - vsize;
		  MPN_COPY (tp, up, size);
		  mpn_sub (tp + size, up + size, usize - size, vp, vsize);
		  rsize = usize;
		}
	      else
		{
		  /* uuuu     */
		  /*   vvvvv  */
		  mp_size_t size, i;
		  size = vsize + ediff - usize;
		  tp[0] = -vp[0] & GMP_NUMB_MASK;
		  for (i = 1; i < size; i++)
		    tp[i] = ~vp[i] & GMP_NUMB_MASK;
		  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
		  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		  rsize = vsize + ediff;
		}
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  size = vsize + ediff - usize;
	  tp[0] = -vp[0] & GMP_NUMB_MASK;
	  for (i = 1; i < vsize; i++)
	    tp[i] = ~vp[i] & GMP_NUMB_MASK;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
	  rsize = size + usize;
	}

      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  uexp--;
	}
      MPN_COPY (rp, tp, rsize);
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  r->_mp_exp = uexp;
  TMP_FREE;
}
Example #11
0
mp_limb_t
mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
{
  mp_size_t q_orig, qn, sh, sl, i;
  mp_limb_t qh, cy, cy2;
  mp_ptr tp;
  TMP_DECL;

  ASSERT (dn >= 6);
  ASSERT (nn >= dn + 3);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }
  q_orig = qn;

  qh = mpn_cmp(np + nn - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);

  np += nn - dn - qn;
  nn = dn + qn;

  /* Reduce until dn - 1 >= qn */
  while (dn - 1 < qn)
  {
     sh = MIN(dn, qn - dn + 1);
     if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);
     qn -= sh; nn -= sh; 
  }

  cy = np[nn - 1];

  /* split into two parts */
  sh = qn/2; sl = qn - sh;

  /* Rare case where truncation ruins normalisation */
  if (cy > dp[dn - 1] || (cy == dp[dn - 1] 
     && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0))
     {
        __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn);
        return qh;
     }

  if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0)
     __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh);
  else
  {
     if (sh < SB_DIVAPPR_Q_CUTOFF)
        mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
     else
        mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);
  }

  cy = np[nn - sh];

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS(sl + 2);

  mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh);
  cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2);

  TMP_FREE;

  while ((mp_limb_signed_t) cy < 0)
  {
      
     qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */
     
     /*
        correct remainder, noting that "digits" of quotient aren't base B
        but in base varying with truncation, thus correction needs fixup
     */
     cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); 

     for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++)
        cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]);
  }
   
  if (cy != 0) /* special case: unable to canonicalise */
     __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl);
  else
  {
     if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0)
        __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl);
     else
     {
        if (sl < SB_DIVAPPR_Q_CUTOFF)
           mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv);
        else
           mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv);
     }

  }

  return qh;
}
Example #12
0
/* 
   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is
   either correct or one too large. We require dp to be normalised and inv
   to be a precomputed inverse given by mpn_invert.
*/
mp_limb_t 
mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, 
                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);
   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }
   
   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy + 1);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	   N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
    
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }
  
   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) 
   {
	   /* Special case, multiply out to get accurate quotient */
	   ret -= mpn_sub_1(qp, qp, dn, 1);
      if (UNLIKELY(ret == ~CNST_LIMB(0)))
         ret += mpn_add_1(qp, qp, dn, 1);
      
      /* ret is now guaranteed to be 0 */
      ASSERT(ret == 0);
       
      mpn_mul_n(tp, qp, dp, dn);
      mpn_sub_n(tp, np, tp, dn+1);
      while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)
	   {
		   ret += mpn_add_1(qp, qp, dn, 1);
		   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);
	   }
       
      /* Not possible for ret == 2 as we have qp*dp <= np */
      ASSERT(ret + ret2 < 2);
   }

   TMP_FREE;

   return ret + ret2;
}
Example #13
0
File: log.c Project: isuruf/arb
void
arb_log_arf(arb_t z, const arf_t x, slong prec)
{
    if (arf_is_special(x))
    {
        if (arf_is_pos_inf(x))
            arb_pos_inf(z);
        else
            arb_indeterminate(z);
    }
    else if (ARF_SGNBIT(x))
    {
        arb_indeterminate(z);
    }
    else if (ARF_IS_POW2(x))
    {
        if (fmpz_is_one(ARF_EXPREF(x)))
        {
            arb_zero(z);
        }
        else
        {
            fmpz_t exp;
            fmpz_init(exp);
            _fmpz_add_fast(exp, ARF_EXPREF(x), -1);
            arb_const_log2(z, prec + 2);
            arb_mul_fmpz(z, z, exp, prec);
            fmpz_clear(exp);
        }
    }
    else if (COEFF_IS_MPZ(*ARF_EXPREF(x)))
    {
        arb_log_arf_huge(z, x, prec);
    }
    else
    {
        slong exp, wp, wn, N, r, closeness_to_one;
        mp_srcptr xp;
        mp_size_t xn, tn;
        mp_ptr tmp, w, t, u;
        mp_limb_t p1, q1bits, p2, q2bits, error, error2, cy;
        int negative, inexact, used_taylor_series;
        TMP_INIT;

        exp = ARF_EXP(x);
        negative = 0;

        ARF_GET_MPN_READONLY(xp, xn, x);

        /* compute a c >= 0 such that |x-1| <= 2^(-c) if c > 0 */
        closeness_to_one = 0;

        if (exp == 0)
        {
            slong i;

            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(~xp[xn - 1]);

            if (closeness_to_one == FLINT_BITS)
            {
                for (i = xn - 2; i > 0 && xp[i] == LIMB_ONES; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(~xp[i]));
            }
        }
        else if (exp == 1)
        {
            closeness_to_one = FLINT_BITS - FLINT_BIT_COUNT(xp[xn - 1] & (~LIMB_TOP));

            if (closeness_to_one == FLINT_BITS)
            {
                slong i;

                for (i = xn - 2; xp[i] == 0; i--)
                    closeness_to_one += FLINT_BITS;

                closeness_to_one += (FLINT_BITS - FLINT_BIT_COUNT(xp[i]));
            }

            closeness_to_one--;
        }

        /* if |t-1| <= 0.5               */
        /* |log(1+t) - t| <= t^2         */
        /* |log(1+t) - (t-t^2/2)| <= t^3 */
        if (closeness_to_one > prec + 1)
        {
            inexact = arf_sub_ui(arb_midref(z), x, 1, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -2 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
            return;
        }
        else if (2 * closeness_to_one > prec + 1)
        {
            arf_t t, u;
            arf_init(t);
            arf_init(u);
            arf_sub_ui(t, x, 1, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul(u, t, t, ARF_PREC_EXACT, ARF_RND_DOWN);
            arf_mul_2exp_si(u, u, -1);
            inexact = arf_sub(arb_midref(z), t, u, prec, ARB_RND);
            mag_set_ui_2exp_si(arb_radref(z), 1, -3 * closeness_to_one);
            if (inexact)
                arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);
            arf_clear(t);
            arf_clear(u);
            return;
        }

        /* Absolute working precision (NOT rounded to a limb multiple) */
        wp = prec + closeness_to_one + 5;

        /* Too high precision to use table */
        if (wp > ARB_LOG_TAB2_PREC)
        {
            arf_log_via_mpfr(arb_midref(z), x, prec, ARB_RND);
            arf_mag_set_ulp(arb_radref(z), arb_midref(z), prec);
            return;
        }

        /* Working precision in limbs */
        wn = (wp + FLINT_BITS - 1) / FLINT_BITS;

        TMP_START;

        tmp = TMP_ALLOC_LIMBS(4 * wn + 3);
        w = tmp;        /* requires wn+1 limbs */
        t = w + wn + 1; /* requires wn+1 limbs */
        u = t + wn + 1; /* requires 2wn+1 limbs */

        /* read x-1 */
        if (xn <= wn)
        {
            flint_mpn_zero(w, wn - xn);
            mpn_lshift(w + wn - xn, xp, xn, 1);
            error = 0;
        }
        else
        {
            mpn_lshift(w, xp + xn - wn, wn, 1);
            error = 1;
        }

        /* First table-based argument reduction */
        if (wp <= ARB_LOG_TAB1_PREC)
            q1bits = ARB_LOG_TAB11_BITS;
        else
            q1bits = ARB_LOG_TAB21_BITS;

        p1 = w[wn-1] >> (FLINT_BITS - q1bits);

        /* Special case: covers logarithms of small integers */
        if (xn == 1 && (w[wn-1] == (p1 << (FLINT_BITS - q1bits))))
        {
            p2 = 0;
            flint_mpn_zero(t, wn);
            used_taylor_series = 0;
            N = r = 0; /* silence compiler warning */
        }
        else
        {
            /* log(1+w) = log(1+p/q) + log(1 + (qw-p)/(p+q)) */
            w[wn] = mpn_mul_1(w, w, wn, UWORD(1) << q1bits) - p1;
            mpn_divrem_1(w, 0, w, wn + 1, p1 + (UWORD(1) << q1bits));
            error += 1;

            /* Second table-based argument reduction (fused with log->atanh
               conversion) */
            if (wp <= ARB_LOG_TAB1_PREC)
                q2bits = ARB_LOG_TAB11_BITS + ARB_LOG_TAB12_BITS;
            else
                q2bits = ARB_LOG_TAB21_BITS + ARB_LOG_TAB22_BITS;

            p2 = w[wn-1] >> (FLINT_BITS - q2bits);

            u[2 * wn] = mpn_lshift(u + wn, w, wn, q2bits);
            flint_mpn_zero(u, wn);
            flint_mpn_copyi(t, u + wn, wn + 1);
            t[wn] += p2 + (UWORD(1) << (q2bits + 1));
            u[2 * wn] -= p2;
            mpn_tdiv_q(w, u, 2 * wn + 1, t, wn + 1);

            /* propagated error from 1 ulp error: 2 atanh'(1/3) = 2.25 */
            error += 3;

            /* |w| <= 2^-r */
            r = _arb_mpn_leading_zeros(w, wn);

            /* N >= (wp-r)/(2r) */
            N = (wp - r + (2*r-1)) / (2*r);
            N = FLINT_MAX(N, 0);

            /* Evaluate Taylor series */
            _arb_atan_taylor_rs(t, &error2, w, wn, N, 0);
            /* Multiply by 2 */
            mpn_lshift(t, t, wn, 1);
            /* Taylor series evaluation error (multiply by 2) */
            error += error2 * 2;

            used_taylor_series = 1;
        }

        /* Size of output number */
        tn = wn;

        /* First table lookup */
        if (p1 != 0)
        {
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab11[p1] + ARB_LOG_TAB1_LIMBS - tn, tn);
            else
                mpn_add_n(t, t, arb_log_tab21[p1] + ARB_LOG_TAB2_LIMBS - tn, tn);
            error++;
        }

        /* Second table lookup */
        if (p2 != 0)
        {
            if (wp <= ARB_LOG_TAB1_PREC)
                mpn_add_n(t, t, arb_log_tab12[p2] + ARB_LOG_TAB1_LIMBS - tn, tn);
            else
                mpn_add_n(t, t, arb_log_tab22[p2] + ARB_LOG_TAB2_LIMBS - tn, tn);
            error++;
        }

        /* add exp * log(2) */
        exp--;

        if (exp > 0)
        {
            cy = mpn_addmul_1(t, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, exp);
            t[tn] = cy;
            tn += (cy != 0);
            error += exp;
        }
        else if (exp < 0)
        {
            t[tn] = 0;
            u[tn] = mpn_mul_1(u, arb_log_log2_tab + ARB_LOG_TAB2_LIMBS - tn, tn, -exp);

            if (mpn_cmp(t, u, tn + 1) >= 0)
            {
                mpn_sub_n(t, t, u, tn + 1);
            }
            else
            {
                mpn_sub_n(t, u, t, tn + 1);
                negative = 1;
            }

            error += (-exp);

            tn += (t[tn] != 0);
        }

        /* The accumulated arithmetic error */
        mag_set_ui_2exp_si(arb_radref(z), error, -wn * FLINT_BITS);

        /* Truncation error from the Taylor series */
        if (used_taylor_series)
            mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, -r*(2*N+1) + 1);

        /* Set the midpoint */
        inexact = _arf_set_mpn_fixed(arb_midref(z), t, tn, wn, negative, prec);
        if (inexact)
            arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec);

        TMP_END;
    }
}
Example #14
0
void
_gst_mpz_add (gst_mpz *sum, const gst_mpz *u, const gst_mpz *v)
{
  mp_srcptr up, vp;
  mp_ptr sump;
  mp_size_t usize, vsize, sumsize;
  mp_size_t abs_usize;
  mp_size_t abs_vsize;

  usize = u->size;
  vsize = v->size;
  abs_usize = ABS (usize);
  abs_vsize = ABS (vsize);

  if (abs_usize < abs_vsize)
    {
      /* Swap U and V.  */
      {const gst_mpz *t = u; u = v; v = t;}
      {mp_size_t t = usize; usize = vsize; vsize = t;}
      {mp_size_t t = abs_usize; abs_usize = abs_vsize; abs_vsize = t;}
    }

  /* True: abs(USIZE) >= abs(VSIZE) */

  /* If not space for sum (and possible carry), increase space.  */
  sumsize = abs_usize + 1;
  if (sum->alloc < sumsize)
    gst_mpz_realloc (sum, sumsize);

  /* These must be after realloc (u or v may be the same as sum).  */
  up = u->d;
  vp = v->d;
  sump = sum->d;

  if (usize >= 0)
    {
      if (vsize >= 0)
	{
	  sumsize = mpn_add (sump, up, abs_usize, vp, abs_vsize);
	  if (sumsize != 0)
	    sump[abs_usize] = 1;
	  sumsize = sumsize + abs_usize;
	}
      else
	{
	  /* The signs are different.  Need exact comparision to determine
	     which operand to subtract from which.  */
	  if (abs_usize == abs_vsize && mpn_cmp (up, vp, abs_usize) < 0)
	    sumsize = -(abs_usize
			+ mpn_sub (sump, vp, abs_usize, up, abs_usize));
	  else
	    sumsize = (abs_usize
		       + mpn_sub (sump, up, abs_usize, vp, abs_vsize));
	}
    }
  else
    {
      if (vsize >= 0)
	{
	  /* The signs are different.  Need exact comparision to determine
	     which operand to subtract from which.  */
	  if (abs_usize == abs_vsize && mpn_cmp (up, vp, abs_usize) < 0)
	    sumsize = (abs_usize
		       + mpn_sub (sump, vp, abs_usize, up, abs_usize));
	  else
	    sumsize = -(abs_usize
			+ mpn_sub (sump, up, abs_usize, vp, abs_vsize));
	}
      else
	{
	  sumsize = mpn_add (sump, up, abs_usize, vp, abs_vsize);
	  if (sumsize != 0)
	    sump[abs_usize] = 1;
	  sumsize = -(sumsize + abs_usize);
	}
    }

  sum->size = sumsize;
}
int
main(void)
{
    mp_bitcnt_t depth, w;
    
    gmp_randstate_t state;

    tests_start();
    fflush(stdout);

    gmp_randinit_default(state);

    for (depth = 6; depth <= 12; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (((mp_limb_t)1)<<depth);
            mp_limb_t trunc;
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;
        
            mpn_rrandom(&trunc, state, 1);
            trunc = 2*n + trunc % (2 * n) + 1;
            trunc = 2*((trunc + 1)/2);

            ii = malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) 
            {
                ii[i] = ptr;
                mpir_random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
            s1 = t2 + size;
   
            for (i = 0; i < 4*n; i++)
               mpn_normmod_2expp1(ii[i], limbs);
    
            jj = malloc(4*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) 
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }
   
            mpir_fft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);
            mpir_ifft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);
            for (i = 0; i < trunc; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < trunc; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("n = %ld, trunc = %ld\n", n, trunc);
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            free(ii);
            free(jj);
        }
    }

    gmp_randclear(state);
    
    tests_end();
    return 0;
}
Example #16
0
/* Multiply {up, un} by {vp, vn} and write the result to
   {prodp, un + vn} assuming vn > 3*ceil(un/4).

   Note that prodp gets un + vn limbs stored, even if the actual 
   result only needs un + vn - 1.
*/
void
mpn_toom4_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,
		          mp_srcptr vp, mp_size_t vn)
{
  mp_size_t ind;
  mp_limb_t cy, cy2, r30, r31;
  mp_ptr tp;
  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1, h2;
  TMP_DECL;

  sn = (un + 3) / 4;

  h1 = un - 3*sn;
  h2 = vn - 3*sn;

  ASSERT (vn > 3*sn);
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)
#define b3 (vp + 3*sn)

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   TMP_MARK;

   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))
#define u6 (tp + 4*t4 + 4*(sn+1))

   u6[sn] = mpn_add(u6, a1, sn, a3, h1);
   u5[sn] = mpn_add_n(u5, a2, a0, sn);
   mpn_add_n(u3, u5, u6, sn + 1);
   n4 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u4, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u4, u6, u5, sn + 1);
      n4 = -n4;
   }

   u6[sn] = mpn_add(u6, b1, sn, b3, h2);
   u5[sn] = mpn_add_n(u5, b2, b0, sn);
   mpn_add_n(r2, u5, u6, sn + 1);
   n5 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u5, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u5, u6, u5, sn + 1);
      n5 = -n5;
   }
 
   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, a2, sn, 1);
   MPN_COPY(r2, a3, h1);
   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);
   cy = mpn_addmul_1(r2, a1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u5, r1, r2, sn + 1);
   n6 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(u6, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(u6, r2, r1, sn + 1);
      n6 = -n6;
   }
 
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, b3, b1, h2, 2);
#else
   r1[sn] = mpn_lshift(r1, b2, sn, 1);
   MPN_COPY(r2, b3, h2);
   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);
   cy = mpn_addmul_1(r2, b1, h2, 4);
#endif
   if (sn > h2) 
   {
      cy2 = mpn_lshift(r2 + h2, b1 + h2, sn - h2, 2);
      cy = cy2 + mpn_add_1(r2 + h2, r2 + h2, sn - h2, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u2, r1, r2, sn + 1);
   n8 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(r2, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(r2, r2, r1, sn + 1);
      n8 = -n8;
   }
    
   r30 = r3[0];
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */
   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */
   r3[1] = r31;

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(u2, a2, a3, h1);
   if (sn > h1)
      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); 
   u2[sn] = cy;
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     
#else
   MPN_COPY(u2, a0, sn);
   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);
   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);
   cy = mpn_addmul_1(u2, a3, h1, 8);
   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);
   u2[sn] += cy;
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(r1, b2, b3, h2);
   if (sn > h2)
      cy = mpn_add_1(r1 + h2, b2 + h2, sn - h2, cy); 
   r1[sn] = cy;
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     
#else
   MPN_COPY(r1, b0, sn);
   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);
   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);
   cy = mpn_addmul_1(r1, b3, h2, 8);
   if (sn > h2) cy = mpn_add_1(r1 + h2, r1 + h2, sn - h2, cy);
   r1[sn] += cy;
#endif
   
   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */
   
   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h2); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */

   TC4_DENORM(r1, n1, t4 - 1);

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != un + vn) 
   {
	  MPN_ZERO((rp + rpn), un + vn - rpn);
   }

   TMP_FREE;
}
Example #17
0
/* if approx is non-zero, does not compute the final remainder */
static mp_size_t
mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
		      mp_limb_t k, int approx)
{
  mp_ptr qp, rp, sp, wp, scratch;
  mp_size_t qn, rn, sn, wn, nl, bn;
  mp_limb_t save, save2, cy;
  unsigned long int unb; /* number of significant bits of {up,un} */
  unsigned long int xnb; /* number of significant bits of the result */
  unsigned long b, kk;
  unsigned long sizes[GMP_NUMB_BITS + 1];
  int ni, i;
  int c;
  int logk;
  TMP_DECL;

  TMP_MARK;

  if (remp == NULL)
    {
      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
      scratch = rp;			 /* used by mpn_div_q */
    }
  else
    {
      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
      rp = remp;
    }
  sp = rootp;

  MPN_SIZEINBASE_2EXP(unb, up, un, 1);
  /* unb is the number of bits of the input U */

  xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
  /* xnb is the number of bits of the root R */

  if (xnb == 1) /* root is 1 */
    {
      if (remp == NULL)
	remp = rp;
      mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
      MPN_NORMALIZE (remp, un);	/* There should be at most one zero limb,
				   if we demand u to be normalized  */
      rootp[0] = 1;
      TMP_FREE;
      return un;
    }

  /* We initialize the algorithm with a 1-bit approximation to zero: since we
     know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
     r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
  kk = k * (xnb - 1);		/* number of truncated bits in the input */
  rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
  MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
  mpn_sub_1 (rp, rp, rn, 1);	/* subtract the initial approximation: since
				   the non-truncated part is less than 2^k, it
				   is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
  sp[0] = 1;			/* initial approximation */
  sn = 1;			/* it has one limb */

  for (logk = 1; ((k - 1) >> logk) != 0; logk++)
    ;
  /* logk = ceil(log(k)/log(2)) */

  b = xnb - 1; /* number of remaining bits to determine in the kth root */
  ni = 0;
  while (b != 0)
    {
      /* invariant: here we want b+1 total bits for the kth root */
      sizes[ni] = b;
      /* if c is the new value of b, this means that we'll go from a root
	 of c+1 bits (say s') to a root of b+1 bits.
	 It is proved in the book "Modern Computer Arithmetic" from Brent
	 and Zimmermann, Chapter 1, that
	 if s' >= k*beta, then at most one correction is necessary.
	 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
	 c >= ceil((b + log2(k))/2). */
      b = (b + logk + 1) / 2;
      if (b >= sizes[ni])
	b = sizes[ni] - 1;	/* add just one bit at a time */
      ni++;
    }
  sizes[ni] = 0;
  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
     sizes[i] <= 2 * sizes[i+1].
     Newton iteration will first compute sizes[ni-1] extra bits,
     then sizes[ni-2], ..., then sizes[0] = b. */

  /* qp and wp need enough space to store S'^k where S' is an approximate
     root. Since S' can be as large as S+2, the worst case is when S=2 and
     S'=4. But then since we know the number of bits of S in advance, S'
     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
     fits in un limbs, the number of extra limbs needed is bounded by
     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
					of R/(k*S^(k-1)), and S^k */
  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
					and temporary for mpn_pow_1 */

  wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
  wn = 1;
  for (i = ni; i != 0; i--)
    {
      /* 1: loop invariant:
	 {sp, sn} is the current approximation of the root, which has
		  exactly 1 + sizes[ni] bits.
	 {rp, rn} is the current remainder
	 {wp, wn} = {sp, sn}^(k-1)
	 kk = number of truncated bits of the input
      */
      b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
				      iteration */

      /* Reinsert a low zero limb if we normalized away the entire remainder */
      if (rn == 0)
	{
	  rp[0] = 0;
	  rn = 1;
	}

      /* first multiply the remainder by 2^b */
      MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
      rn = rn + b / GMP_NUMB_BITS;
      if (cy != 0)
	{
	  rp[rn] = cy;
	  rn++;
	}

      kk = kk - b;

      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */

      /* Now insert bits [kk,kk+b-1] from the input U */
      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
      save = rp[bn];
      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
		 - floor(kk / GMP_NUMB_BITS)
	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
	     = 2 + (b - 2) / GMP_NUMB_BITS
	 thus since nl is an integer:
	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
      /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
      if (nl - 1 > bn)
	save2 = rp[bn + 1];
      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
      /* set to zero high bits of rp[bn] */
      rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
      /* restore corresponding bits */
      rp[bn] |= save;
      if (nl - 1 > bn)
	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
			       they start by bit 0 in rp[0], so they use
			       at most ceil(b/GMP_NUMB_BITS) limbs */

      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */

      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
      cy = mpn_mul_1 (wp, wp, wn, k);
      wp[wn] = cy;
      wn += cy != 0;

      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */

      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
      if (rn < wn)
	{
	  qn = 0;
	}
      else
	{
	  qn = rn - wn; /* expected quotient size */
	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
	  qn += qp[qn] != 0;
	}

      /* 5: current buffers: {sp,sn}, {qp,qn}.
	 Note: {rp,rn} is not needed any more since we'll compute it from
	 scratch at the end of the loop.
       */

      /* Number of limbs used by b bits, when least significant bit is
	 aligned to least limb */
      bn = (b - 1) / GMP_NUMB_BITS + 1;

      /* the quotient should be smaller than 2^b, since the previous
	 approximation was correctly rounded toward zero */
      if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
		      qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
	{
	  qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
	  MPN_ZERO (qp, qn);
	  qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
	  MPN_DECR_U (qp, qn, 1);
	  qn -= qp[qn - 1] == 0;
	}

      /* 6: current buffers: {sp,sn}, {qp,qn} */

      /* multiply the root approximation by 2^b */
      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
      sn = sn + b / GMP_NUMB_BITS;
      if (cy != 0)
	{
	  sp[sn] = cy;
	  sn++;
	}

      /* 7: current buffers: {sp,sn}, {qp,qn} */

      ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
				   above, q is set to 2^b-1, which has
				   exactly bn limbs */

      /* Combine sB and q to form sB + q.  */
      save = sp[b / GMP_NUMB_BITS];
      MPN_COPY (sp, qp, qn);
      MPN_ZERO (sp + qn, bn - qn);
      sp[b / GMP_NUMB_BITS] |= save;

      /* 8: current buffer: {sp,sn} */

      /* Since each iteration treats b bits from the root and thus k*b bits
	 from the input, and we already considered b bits from the input,
	 we now have to take another (k-1)*b bits from the input. */
      kk -= (k - 1) * b; /* remaining input bits */
      /* {rp, rn} = floor({up, un} / 2^kk) */
      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
      rn = un - kk / GMP_NUMB_BITS;
      rn -= rp[rn - 1] == 0;

      /* 9: current buffers: {sp,sn}, {rp,rn} */

     for (c = 0;; c++)
	{
	  /* Compute S^k in {qp,qn}. */
	  if (i == 1)
	    {
	      /* Last iteration: we don't need W anymore. */
	      /* mpn_pow_1 requires that both qp and wp have enough space to
		 store the result {sp,sn}^k + 1 limb */
	      approx = approx && (sp[0] > 1);
	      qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
	    }
	  else
	    {
	      /* W <- S^(k-1) for the next iteration,
		 and S^k = W * S. */
	      wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
	      mpn_mul (qp, wp, wn, sp, sn);
	      qn = wn + sn;
	      qn -= qp[qn - 1] == 0;
	    }

	  /* if S^k > floor(U/2^kk), the root approximation was too large */
	  if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
	    MPN_DECR_U (sp, sn, 1);
	  else
	    break;
	}

      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */

      ASSERT_ALWAYS (c <= 1);
      ASSERT_ALWAYS (rn >= qn);

      /* R = R - Q = floor(U/2^kk) - S^k */
      if (i > 1 || approx == 0)
	{
	  mpn_sub (rp, rp, rn, qp, qn);
	  MPN_NORMALIZE (rp, rn);
	}
      /* otherwise we have rn > 0, thus the return value is ok */

      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
    }

  TMP_FREE;
  return rn;
}
Example #18
0
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.

   Iterates

     r' <-- r - r * (a^{k-1} r^k - 1) / n

   If

     a^{k-1} r^k = 1 (mod 2^m),

   then

     a^{k-1} r'^k = 1 (mod 2^{2m}),

   Compute the update term as

     r' = r - (a^{k-1} r^{k+1} - r) / k

   where we still have cancelation of low limbs.

 */
void
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
{
  mp_size_t sizes[GMP_LIMB_BITS * 2];
  mp_ptr akm1, tp, rnp, ep, scratch;
  mp_limb_t a0, r0, km1, kp1h, kinv;
  mp_size_t rn;
  unsigned i;

  TMP_DECL;

  ASSERT (n > 0);
  ASSERT (ap[0] & 1);
  ASSERT (k & 1);
  ASSERT (k >= 3);

  TMP_MARK;

  akm1 = TMP_ALLOC_LIMBS (4*n);
  tp = akm1 + n;

  km1 = k-1;
  /* FIXME: Could arrange the iteration so we don't need to compute
     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     that we can use wraparound also for a*r, since the low half is
     unchanged from the previous iteration. Or possibly mulmid. Also,
     a r = a^{1/k}, so we get that value too, for free? */
  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */

  a0 = ap[0];
  binvert_limb (kinv, k);

  /* 4 bits: a^{1/k - 1} (mod 16):

	a % 8
	1 3 5 7
   k%4 +-------
     1 |1 1 1 1
     3 |1 9 9 1
  */
  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
#if GMP_NUMB_BITS > 32
  {
    unsigned prec = 32;
    do
      {
	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
	prec *= 2;
      }
    while (prec < GMP_NUMB_BITS);
  }
#endif

  rp[0] = r0;
  if (n == 1)
    {
      TMP_FREE;
      return;
    }

  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
  kp1h = k/2 + 1;

  /* FIXME: Special case for two limb iteration. */
  rnp = TMP_ALLOC_LIMBS (2*n + 1);
  ep = rnp + n;

  /* FIXME: Possible to this on the fly with some bit fiddling. */
  for (i = 0; n > 1; n = (n + 1)/2)
    sizes[i++] = n;

  rn = 1;

  while (i-- > 0)
    {
      /* Compute x^{k+1}. */
      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
			       final iteration.*/
      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);

      /* Multiply by a^{k-1}. Can use wraparound; low part equals
	 r. */

      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
      ASSERT (mpn_cmp (ep, rp, rn) == 0);

      ASSERT (sizes[i] <= 2*rn);
      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
      rn = sizes[i];
    }
  TMP_FREE;
}
Example #19
0
/* Check divide and conquer division routine. */
void
check_dc_divappr_q_n (void)
{
   mp_limb_t tp[DC_DIVAPPR_Q_N_ITCH(MAX_LIMBS)];
   mp_limb_t np[2*MAX_LIMBS];
   mp_limb_t np2[2*MAX_LIMBS];
   mp_limb_t rp[2*MAX_LIMBS];
   mp_limb_t dp[MAX_LIMBS];
   mp_limb_t qp[MAX_LIMBS];
   mp_limb_t dip, d1ip;

   mp_size_t nn, rn, dn, qn;

   gmp_randstate_t rands;

   int i, j, s;
   gmp_randinit_default(rands);
  
   for (i = 0; i < ITERS; i++)
   {
      dn = (random() % (MAX_LIMBS - 6)) + 6;
      nn = 2*dn;
         
      mpn_rrandom (np, rands, nn);
      mpn_rrandom (dp, rands, dn);
      dp[dn-1] |= GMP_LIMB_HIGHBIT;

      MPN_COPY(np2, np, nn);
      
      mpir_invert_pi2(dip, d1ip, dp[dn - 1], dp[dn - 2]);
      
      qn = nn - dn + 1;
         
      qp[qn - 1] = mpn_dc_divappr_q_n(qp, np, dp, dn, dip, d1ip, tp);
      
      MPN_NORMALIZE(qp, qn);

      if (qn)
      {
         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);
         else mpn_mul(rp, dp, dn, qp, qn);

         rn = dn + qn;
         MPN_NORMALIZE(rp, rn);

         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);
         if (s <= 0) 
         {
            mpn_sub(rp, np2, nn, rp, rn);
            rn = nn;
            MPN_NORMALIZE(rp, rn);
         } else 
         {
            mpn_sub(rp, rp, rn, np2, nn);
            MPN_NORMALIZE(rp, rn);
         }
      } else
      {
         rn = nn;
         MPN_COPY(rp, np, nn);
      }
      
      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
      if (s >= 0)
      {
         printf ("failed:\n");
         printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
         gmp_printf (" np: %Nx\n\n", np2, nn);
         gmp_printf (" dp: %Nx\n\n", dp, dn);
         gmp_printf (" qp: %Nx\n\n", qp, qn);
         gmp_printf (" rp: %Nx\n\n", rp, rn);
         abort ();
      }
   }

   gmp_randclear(rands);
}
Example #20
0
int
main (void)
{
    gmp_randstate_t rands;
    int j, n;
    mp_limb_t cp1[1000], cp2[1000], mp[1000], tp1[1000], tp2[1000], inv;

    tests_start ();
    gmp_randinit_default (rands);

    for (n = 1; n < 100; n++)
    {
        for (j = 1; j < 100; j++)
        {
            mpn_randomb (mp, rands, n);
            mp[0] |= 1;
            modlimb_invert (inv, mp[0]);
            inv = -inv;
            mpn_randomb (tp1, rands, 2 * n);
            MPN_COPY (tp2, tp1, 2 * n);
            ref_redc_1 (cp1, tp1, mp, n, inv);
            mpn_redc_1 (cp2, tp2, mp, n, inv);
            if (mpn_cmp (cp1, cp2, n) != 0)
            {
                printf ("mpn_redc_1 error %d\n", n);
                abort ();
            }
            if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0)
            {
                printf ("mpn_redc_1 possible error\n");
                abort ();
            }
            /* we dont require the above to be the same but it could be a useful test */
        }
    }
    for (n = 1; n < 100; n++)
    {
        for (j = 1; j < 100; j++)
        {
            mpn_rrandom (mp, rands, n);
            mp[0] |= 1;
            modlimb_invert (inv, mp[0]);
            inv = -inv;
            mpn_rrandom (tp1, rands, 2 * n);
            MPN_COPY (tp2, tp1, 2 * n);
            ref_redc_1 (cp1, tp1, mp, n, inv);
            mpn_redc_1 (cp2, tp2, mp, n, inv);
            if (mpn_cmp (cp1, cp2, n) != 0)
            {
                printf ("mpn_redc_1 error %d\n", n);
                abort ();
            }
            if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0)
            {
                printf ("mpn_redc_1 possible error\n");
                abort ();
            }
            /* we dont require the above to be the same but it could be a useful test */
        }
    }

    gmp_randclear (rands);
    tests_end ();
    exit (0);
}
Example #21
0
mp_limb_t
mpn_sbpi1_div_q (mp_ptr qp,
		 mp_ptr np, mp_size_t nn,
		 mp_srcptr dp, mp_size_t dn,
		 mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t qn, i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0;
  mp_limb_t cy, cy1;
  mp_limb_t q;
  mp_limb_t flag;

  mp_size_t dn_orig = dn;
  mp_srcptr dp_orig = dp;
  mp_ptr np_orig = np;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  qp += qn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  np -= 2;

  n1 = np[1];

  for (i = qn - (dn + 2); i >= 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 -= cy1;
	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
	      q--;
	    }
	}

      *--qp = q;
    }

  flag = ~CNST_LIMB(0);

  if (dn >= 0)
    {
      for (i = dn; i > 0; i--)
	{
	  np--;
	  if (UNLIKELY (n1 >= (d1 & flag)))
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);

	      if (UNLIKELY (n1 != cy))
		{
		  if (n1 < (cy & flag))
		    {
		      q--;
		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
		    }
		  else
		    flag = 0;
		}
	      n1 = np[1];
	    }
	  else
	    {
	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

	      cy = mpn_submul_1 (np - dn, dp, dn, q);

	      cy1 = n0 < cy;
	      n0 = (n0 - cy) & GMP_NUMB_MASK;
	      cy = n1 < cy1;
	      n1 -= cy1;
	      np[0] = n0;

	      if (UNLIKELY (cy != 0))
		{
		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
		  q--;
		}
	    }

	  *--qp = q;

	  /* Truncate operands.  */
	  dn--;
	  dp++;
	}

      np--;
      if (UNLIKELY (n1 >= (d1 & flag)))
	{
	  q = GMP_NUMB_MASK;
	  cy = mpn_submul_1 (np, dp, 2, q);

	  if (UNLIKELY (n1 != cy))
	    {
	      if (n1 < (cy & flag))
		{
		  q--;
		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
		}
	      else
		flag = 0;
	    }
	  n1 = np[1];
	}
      else
	{
	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

	  np[0] = n0;
	  np[1] = n1;
	}

      *--qp = q;
    }
  ASSERT_ALWAYS (np[1] == n1);
  np += 2;


  dn = dn_orig;
  if (UNLIKELY (n1 < (dn & flag)))
    {
      mp_limb_t q, x;

      /* The quotient may be too large if the remainder is small.  Recompute
	 for above ignored operand parts, until the remainder spills.

	 FIXME: The quality of this code isn't the same as the code above.
	 1. We don't compute things in an optimal order, high-to-low, in order
	    to terminate as quickly as possible.
	 2. We mess with pointers and sizes, adding and subtracting and
	    adjusting to get things right.  It surely could be streamlined.
	 3. The only termination criteria are that we determine that the
	    quotient needs to be adjusted, or that we have recomputed
	    everything.  We should stop when the remainder is so large
	    that no additional subtracting could make it spill.
	 4. If nothing else, we should not do two loops of submul_1 over the
	    data, instead handle both the triangularization and chopping at
	    once.  */

      x = n1;

      if (dn > 2)
	{
	  /* Compensate for triangularization.  */
	  mp_limb_t y;

	  dp = dp_orig;
	  if (qn + 1 < dn)
	    {
	      dp += dn - (qn + 1);
	      dn = qn + 1;
	    }

	  y = np[-2];

	  for (i = dn - 3; i >= 0; i--)
	    {
	      q = qp[i];
	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);

	      if (y < cy)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      ASSERT_ALWAYS (cy == 0);
		      return qh - cy;
		    }
		  x--;
		}
	      y -= cy;
	    }
	  np[-2] = y;
	}

      dn = dn_orig;
      if (qn + 1 < dn)
	{
	  /* Compensate for ignored dividend and divisor tails.  */

	  dp = dp_orig;
	  np = np_orig;

	  if (qh != 0)
	    {
	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      if (qn != 0)
			cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh - cy;
		    }
		  x--;
		}
	    }

	  if (qn == 0)
	    return qh;

	  for (i = dn - qn - 2; i >= 0; i--)
	    {
	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh;
		    }
		  x--;
		}
	    }
	}
    }

  return qh;
}
Example #22
0
int
main (int argc, char **argv)
{
  mp_ptr ap, bp, refp, pp, scratch;
  int count = COUNT;
  int test;
  gmp_randstate_ptr rands;
  TMP_DECL;
  TMP_MARK;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }

  tests_start ();
  rands = RANDS;

#define mpn_mullo_itch(n) (0)

  ap = TMP_ALLOC_LIMBS (MAX_N);
  bp = TMP_ALLOC_LIMBS (MAX_N);
  refp = TMP_ALLOC_LIMBS (MAX_N * 2);
  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
  scratch
    = 1+TMP_ALLOC_LIMBS (mpn_mullo_itch (MAX_N) + 2);

  for (test = 0; test < count; test++)
    {
      unsigned size_min;
      unsigned size_range;
      mp_size_t n;
      mp_size_t itch;
      mp_limb_t p_before, p_after, s_before, s_after;

      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
	;

      /* We generate an in the MIN_N <= n <= (1 << size_range). */
      size_range = size_min
	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);

      n = MIN_N
	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);

      mpn_random2 (ap, n);
      mpn_random2 (bp, n);
      mpn_random2 (pp-1, n + 2);
      p_before = pp[-1];
      p_after = pp[n];

      itch = mpn_mullo_itch (n);
      ASSERT_ALWAYS (itch <= mpn_mullo_itch (MAX_N));
      mpn_random2 (scratch-1, itch+2);
      s_before = scratch[-1];
      s_after = scratch[itch];

      mpn_mullo_n (pp, ap, bp, n);
      mpn_mul_n (refp, ap, bp, n);
      if (pp[-1] != p_before || pp[n] != p_after
	  || scratch[-1] != s_before || scratch[itch] != s_after
	  || mpn_cmp (refp, pp, n) != 0)
	{
	  printf ("ERROR in test %d, n = %d",
		  test, (int) n);
	  if (pp[-1] != p_before)
	    {
	      printf ("before pp:"); mpn_dump (pp -1, 1);
	      printf ("keep:   "); mpn_dump (&p_before, 1);
	    }
	  if (pp[n] != p_after)
	    {
	      printf ("after pp:"); mpn_dump (pp + n, 1);
	      printf ("keep:   "); mpn_dump (&p_after, 1);
	    }
	  if (scratch[-1] != s_before)
	    {
	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
	      printf ("keep:   "); mpn_dump (&s_before, 1);
	    }
	  if (scratch[itch] != s_after)
	    {
	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
	      printf ("keep:   "); mpn_dump (&s_after, 1);
	    }
	  mpn_dump (ap, n);
	  mpn_dump (bp, n);
	  mpn_dump (pp, n);
	  mpn_dump (refp, n);

	  abort();
	}
    }
  TMP_FREE;
  tests_end ();
  return 0;
}
Example #23
0
mp_limb_t
mpn_preinv_dc_divappr_q (mp_ptr qp,
			 mp_ptr np, mp_size_t nn,
			 mp_srcptr dp, mp_size_t dn,
			 mp_srcptr dip)
{
  mp_size_t qn;
  mp_limb_t qh, cy, qsave;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  tp = TMP_SALLOC_LIMBS (dn+1);

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn > dn)
    {
      qn++;			/* pretend we'll need an extra limb */
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      /* Perform the typically smaller block first.  */
      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
      else
	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);

      if (qn != dn)
	{
	  if (qn > dn - qn)
	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	  else
	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	  if (qh != 0)
	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	  while (cy != 0)
	    {
	      qh -= mpn_sub_1 (qp, qp, qn, 1);
	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
	    }
	}

      qn = nn - dn - qn + 1;
      while (qn > dn)
	{
	  qp -= dn;
	  np -= dn;
	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
	  qn -= dn;
	}

      /* Since we pretended we'd need an extra quotient limb before, we now
	 have made sure the code above left just dn-1=qn quotient limbs to
	 develop.  Develop that plus a guard limb. */
      qn--;
      qp -= qn;
      np -= dn;
      qsave = qp[qn];
      mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp);
      MPN_COPY_INCR (qp, qp + 1, qn);
      qp[qn] = qsave;
    }
  else
    {
      if (qn == 0)
	{
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
	  if (qh)
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
	  TMP_FREE;
	  return qh;
	}

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
	 /* Full precision.  Optimal?  */
	qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip);
      else
	{
	  /* Put quotient in tp, use qp as temporary, since qp lacks a limb.  */
	  qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp);
	  MPN_COPY (qp, tp + 1, qn);
	}
    }

  TMP_FREE;
  return qh;
}
Example #24
0
File: t-str.c Project: caomw/gmp
void
testmain (int argc, char **argv)
{
  unsigned i;
  char *ap;
  char *bp;
  char *rp;
  size_t bn, rn, arn;

  mpz_t a, b;

  FILE *tmp;

  test_small ();

  mpz_init (a);
  mpz_init (b);

  tmp = tmpfile ();
  if (!tmp)
    fprintf (stderr,
	     "Failed to create temporary file. Skipping mpz_out_str tests.\n");

  for (i = 0; i < COUNT; i++)
    {
      int base;
      for (base = 0; base <= 36; base += 1 + (base == 0))
	{
	  hex_random_str_op (MAXBITS, i&1 ? base: -base, &ap, &rp);
	  if (mpz_set_str (a, ap, 16) != 0)
	    {
	      fprintf (stderr, "mpz_set_str failed on input %s\n", ap);
	      abort ();
	    }

	  rn = strlen (rp);
	  arn = rn - (rp[0] == '-');

	  bn = mpz_sizeinbase (a, base ? base : 10);
	  if (bn < arn || bn > (arn + 1))
	    {
	      fprintf (stderr, "mpz_sizeinbase failed:\n");
	      dump ("a", a);
	      fprintf (stderr, "r = %s\n", rp);
	      fprintf (stderr, "  base %d, correct size %u, got %u\n",
		       base, (unsigned) arn, (unsigned)bn);
	      abort ();
	    }
	  bp = mpz_get_str (NULL, i&1 ? base: -base, a);
	  if (strcmp (bp, rp))
	    {
	      fprintf (stderr, "mpz_get_str failed:\n");
	      dump ("a", a);
	      fprintf (stderr, "b = %s\n", bp);
	      fprintf (stderr, "  base = %d\n", base);
	      fprintf (stderr, "r = %s\n", rp);
	      abort ();
	    }

	  /* Just a few tests with file i/o. */
	  if (tmp && i < 20)
	    {
	      size_t tn;
	      rewind (tmp);
	      tn = mpz_out_str (tmp, i&1 ? base: -base, a);
	      if (tn != rn)
		{
		  fprintf (stderr, "mpz_out_str, bad return value:\n");
		  dump ("a", a);
		  fprintf (stderr, "r = %s\n", rp);
		  fprintf (stderr, "  base %d, correct size %u, got %u\n",
			   base, (unsigned) rn, (unsigned)tn);
		  abort ();
		}
	      rewind (tmp);
	      memset (bp, 0, rn);
	      tn = fread (bp, 1, rn, tmp);
	      if (tn != rn)
		{
		  fprintf (stderr,
			   "fread failed, expected %lu bytes, got only %lu.\n",
			   (unsigned long) rn, (unsigned long) tn);
		  abort ();
		}

	      if (memcmp (bp, rp, rn) != 0)
		{
		  fprintf (stderr, "mpz_out_str failed:\n");
		  dump ("a", a);
		  fprintf (stderr, "b = %s\n", bp);
		  fprintf (stderr, "  base = %d\n", base);
		  fprintf (stderr, "r = %s\n", rp);
		  abort ();
		}
	    }

	  mpz_set_str (b, rp, base);

	  if (mpz_cmp (a, b))
	    {
	      fprintf (stderr, "mpz_set_str failed:\n");
	      fprintf (stderr, "r = %s\n", rp);
	      fprintf (stderr, "  base = %d\n", base);
	      fprintf (stderr, "r = %s\n", ap);
	      fprintf (stderr, "  base = 16\n");
	      dump ("b", b);
	      dump ("r", a);
	      abort ();
	    }

	  /* Test mpn interface */
	  if (base && mpz_sgn (a))
	    {
	      size_t i;
	      const char *absr;
	      mp_limb_t t[MAXLIMBS];
	      mp_size_t tn = mpz_size (a);

	      assert (tn <= MAXLIMBS);
	      mpn_copyi (t, a->_mp_d, tn);

	      bn = mpn_get_str ((unsigned char *) bp, base, t, tn);
	      if (bn != arn)
		{
		  fprintf (stderr, "mpn_get_str failed:\n");
		  fprintf (stderr, "returned length: %lu (bad)\n", (unsigned long) bn);
		  fprintf (stderr, "expected: %lu\n", (unsigned long) arn);
		  fprintf (stderr, "  base = %d\n", base);
		  fprintf (stderr, "r = %s\n", ap);
		  fprintf (stderr, "  base = 16\n");
		  dump ("b", b);
		  dump ("r", a);
		  abort ();
		}
	      absr = rp + (rp[0] == '-');

	      for (i = 0; i < bn; i++)
		{
		  unsigned char digit = absr[i];
		  unsigned value;
		  if (digit >= '0' && digit <= '9')
		    value = digit - '0';
		  else if (digit >= 'a' && digit <= 'z')
		    value = digit - 'a' + 10;
		  else if (digit >= 'A' && digit <= 'Z')
		    value = digit - 'A' + 10;
		  else
		    {
		      fprintf (stderr, "Internal error in test.\n");
		      abort();
		    }
		  if (bp[i] != value)
		    {
		      fprintf (stderr, "mpn_get_str failed:\n");
		      fprintf (stderr, "digit %lu: %d (bad)\n", (unsigned long) i, bp[i]);
		      fprintf (stderr, "expected: %d\n", value);
		      fprintf (stderr, "  base = %d\n", base);
		      fprintf (stderr, "r = %s\n", ap);
		      fprintf (stderr, "  base = 16\n");
		      dump ("b", b);
		      dump ("r", a);
		      abort ();
		    }
		}
	      tn = mpn_set_str (t, (unsigned char *) bp, bn, base);
	      if (tn != mpz_size (a) || mpn_cmp (t, a->_mp_d, tn))
		{
		  fprintf (stderr, "mpn_set_str failed:\n");
		  fprintf (stderr, "r = %s\n", rp);
		  fprintf (stderr, "  base = %d\n", base);
		  fprintf (stderr, "r = %s\n", ap);
		  fprintf (stderr, "  base = 16\n");
		  dump ("r", a);
		  abort ();
		}
	    }
	  free (ap);
	  testfree (bp);
	}
    }
  mpz_clear (a);
  mpz_clear (b);
}
Example #25
0
mp_size_t
mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
{
  mp_size_t talloc;
  mp_size_t scratch;
  mp_size_t matrix_scratch;
  mp_size_t ualloc = n + 1;

  struct gcdext_ctx ctx;
  mp_size_t un;
  mp_ptr u0;
  mp_ptr u1;

  mp_ptr tp;

  TMP_DECL;

  ASSERT (an >= n);
  ASSERT (n > 0);
  ASSERT (bp[n-1] > 0);

  TMP_MARK;

  /* FIXME: Check for small sizes first, before setting up temporary
     storage etc. */
  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);

  /* For initial division */
  scratch = an - n + 1;
  if (scratch > talloc)
    talloc = scratch;

  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      /* For hgcd loop. */
      mp_size_t hgcd_scratch;
      mp_size_t update_scratch;
      mp_size_t p1 = CHOOSE_P_1 (n);
      mp_size_t p2 = CHOOSE_P_2 (n);
      mp_size_t min_p = MIN(p1, p2);
      mp_size_t max_p = MAX(p1, p2);
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
      hgcd_scratch = mpn_hgcd_itch (n - min_p);
      update_scratch = max_p + n - 1;

      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
      if (scratch > talloc)
	talloc = scratch;

      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
	 copies of a and b. */
      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
	+ 3*GCDEXT_DC_THRESHOLD;

      if (scratch > talloc)
	talloc = scratch;

      /* Cofactors u0 and u1 */
      talloc += 2*(n+1);
    }

  tp = TMP_ALLOC_LIMBS(talloc);

  if (an > n)
    {
      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);

      if (mpn_zero_p (ap, n))
	{
	  MPN_COPY (gp, bp, n);
	  *usizep = 0;
	  TMP_FREE;
	  return n;
	}
    }

  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);

      TMP_FREE;
      return gn;
    }

  MPN_ZERO (tp, 2*ualloc);
  u0 = tp; tp += ualloc;
  u1 = tp; tp += ualloc;

  ctx.gp = gp;
  ctx.up = up;
  ctx.usize = usizep;

  {
    /* For the first hgcd call, there are no u updates, and it makes
       some sense to use a different choice for p. */

    /* FIXME: We could trim use of temporary storage, since u0 and u1
       are not used yet. For the hgcd call, we could swap in the u0
       and u1 pointers for the relevant matrix elements. */

    struct hgcd_matrix M;
    mp_size_t p = CHOOSE_P_1 (n);
    mp_size_t nn;

    mpn_hgcd_matrix_init (&M, n - p, tp);
    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
    if (nn > 0)
      {
	ASSERT (M.n <= (n - p - 1)/2);
	ASSERT (M.n + p <= (p + n - 1) / 2);

	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);

	MPN_COPY (u0, M.p[1][0], M.n);
	MPN_COPY (u1, M.p[1][1], M.n);
	un = M.n;
	while ( (u0[un-1] | u1[un-1] ) == 0)
	  un--;
      }
    else
      {
	/* mpn_hgcd has failed. Then either one of a or b is very
	   small, or the difference is very small. Perform one
	   subtraction followed by one division. */
	u1[0] = 1;

	ctx.u0 = u0;
	ctx.u1 = u1;
	ctx.tp = tp + n; /* ualloc */
	ctx.un = 1;

	/* Temporary storage n */
	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
	if (n == 0)
	  {
	    TMP_FREE;
	    return ctx.gn;
	  }

	un = ctx.un;
	ASSERT (un < ualloc);
      }
  }

  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
    {
      struct hgcd_matrix M;
      mp_size_t p = CHOOSE_P_2 (n);
      mp_size_t nn;

      mpn_hgcd_matrix_init (&M, n - p, tp);
      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
      if (nn > 0)
	{
	  mp_ptr t0;

	  t0 = tp + matrix_scratch;
	  ASSERT (M.n <= (n - p - 1)/2);
	  ASSERT (M.n + p <= (p + n - 1) / 2);

	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);

	  /* By the same analysis as for mpn_hgcd_matrix_mul */
	  ASSERT (M.n + un <= ualloc);

	  /* FIXME: This copying could be avoided by some swapping of
	   * pointers. May need more temporary storage, though. */
	  MPN_COPY (t0, u0, un);

	  /* Temporary storage ualloc */
	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);

	  ASSERT (un < ualloc);
	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
	}
      else
	{
	  /* mpn_hgcd has failed. Then either one of a or b is very
	     small, or the difference is very small. Perform one
	     subtraction followed by one division. */
	  ctx.u0 = u0;
	  ctx.u1 = u1;
	  ctx.tp = tp + n; /* ualloc */
	  ctx.un = un;

	  /* Temporary storage n */
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
	  if (n == 0)
	    {
	      TMP_FREE;
	      return ctx.gn;
	    }

	  un = ctx.un;
	  ASSERT (un < ualloc);
	}
    }
  /* We have A = ... a + ... b
	     B =  u0 a +  u1 b

	     a = u1  A + ... B
	     b = -u0 A + ... B

     with bounds

       |u0|, |u1| <= B / min(a, b)

     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
     in which case the only reduction done so far is a = A - k B for
     some k.

     Compute g = u a + v b = (u u1 - v u0) A + (...) B
     Here, u, v are bounded by

       |u| <= b,
       |v| <= a
  */

  ASSERT ( (ap[n-1] | bp[n-1]) > 0);

  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
    {
      /* Must return the smallest cofactor, +u1 or -u0 */
      int c;

      MPN_COPY (gp, ap, n);

      MPN_CMP (c, u0, u1, un);
      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
	 this case we choose the cofactor + 1, corresponding to G = A
	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
      if (c < 0)
	{
	  MPN_NORMALIZE (u0, un);
	  MPN_COPY (up, u0, un);
	  *usizep = -un;
	}
      else
	{
	  MPN_NORMALIZE_NOT_ZERO (u1, un);
	  MPN_COPY (up, u1, un);
	  *usizep = un;
	}

      TMP_FREE;
      return n;
    }
  else if (UNLIKELY (u0[0] == 0) && un == 1)
    {
      mp_size_t gn;
      ASSERT (u1[0] == 1);

      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);

      TMP_FREE;
      return gn;
    }
  else
    {
      mp_size_t u0n;
      mp_size_t u1n;
      mp_size_t lehmer_un;
      mp_size_t lehmer_vn;
      mp_size_t gn;

      mp_ptr lehmer_up;
      mp_ptr lehmer_vp;
      int negate;

      lehmer_up = tp; tp += n;

      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
      MPN_COPY (tp, ap, n);
      MPN_COPY (tp + n, bp, n);
      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);

      u0n = un;
      MPN_NORMALIZE (u0, u0n);
      ASSERT (u0n > 0);

      if (lehmer_un == 0)
	{
	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
	  MPN_COPY (up, u0, u0n);
	  *usizep = -u0n;

	  TMP_FREE;
	  return gn;
	}

      lehmer_vp = tp;
      /* Compute v = (g - u a) / b */
      lehmer_vn = compute_v (lehmer_vp,
			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);

      if (lehmer_un > 0)
	negate = 0;
      else
	{
	  lehmer_un = -lehmer_un;
	  negate = 1;
	}

      u1n = un;
      MPN_NORMALIZE (u1, u1n);
      ASSERT (u1n > 0);

      ASSERT (lehmer_un + u1n <= ualloc);
      ASSERT (lehmer_vn + u0n <= ualloc);

      /* We may still have v == 0 */

      /* Compute u u0 */
      if (lehmer_un <= u1n)
	/* Should be the common case */
	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
      else
	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);

      un = u1n + lehmer_un;
      un -= (up[un - 1] == 0);

      if (lehmer_vn > 0)
	{
	  mp_limb_t cy;

	  /* Overwrites old u1 value */
	  if (lehmer_vn <= u0n)
	    /* Should be the common case */
	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
	  else
	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);

	  u1n = u0n + lehmer_vn;
	  u1n -= (u1[u1n - 1] == 0);

	  if (u1n <= un)
	    {
	      cy = mpn_add (up, up, un, u1, u1n);
	    }
	  else
	    {
	      cy = mpn_add (up, u1, u1n, up, un);
	      un = u1n;
	    }
	  up[un] = cy;
	  un += (cy != 0);

	  ASSERT (un < ualloc);
	}
      *usizep = negate ? -un : un;

      TMP_FREE;
      return gn;
    }
}
Example #26
0
void
mpn_toom53_mul (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t n, s, t;
  mp_limb_t cy;
  mp_ptr gp;
  mp_ptr as1, asm1, as2, asm2, ash;
  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
  enum toom7_flags flags;
  TMP_DECL;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define a3  (ap + 3*n)
#define a4  (ap + 4*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);

  s = an - 4 * n;
  t = bn - 2 * n;

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  TMP_MARK;

  as1  = TMP_SALLOC_LIMBS (n + 1);
  asm1 = TMP_SALLOC_LIMBS (n + 1);
  as2  = TMP_SALLOC_LIMBS (n + 1);
  asm2 = TMP_SALLOC_LIMBS (n + 1);
  ash  = TMP_SALLOC_LIMBS (n + 1);

  bs1  = TMP_SALLOC_LIMBS (n + 1);
  bsm1 = TMP_SALLOC_LIMBS (n + 1);
  bs2  = TMP_SALLOC_LIMBS (n + 1);
  bsm2 = TMP_SALLOC_LIMBS (n + 1);
  bsh  = TMP_SALLOC_LIMBS (n + 1);

  gp = pp;

  /* Compute as1 and asm1.  */
  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));

  /* Compute as2 and asm2. */
  flags = (enum toom7_flags) (flags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp)));

  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
#if HAVE_NATIVE_mpn_addlsh1_n
  cy = mpn_addlsh1_n (ash, a1, a0, n);
  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
  if (s < n)
    {
      mp_limb_t cy2;
      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
      MPN_INCR_U (ash + s, n+1-s, cy2);
    }
  else
    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
#else
  cy = mpn_lshift (ash, a0, n, 1);
  cy += mpn_add_n (ash, ash, a1, n);
  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
  cy += mpn_add_n (ash, ash, a2, n);
  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
  cy += mpn_add_n (ash, ash, a3, n);
  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
#endif

  /* Compute bs1 and bsm1.  */
  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
    {
      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
      bsm1[n] = 0;
      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
    }
Example #27
0
/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
int
mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
		      mp_ptr tp)
{
  unsigned i;
  int neg;
#if HAVE_NATIVE_mpn_addlsh_n
  mp_limb_t cy;
#endif

  ASSERT (k >= 3);
  ASSERT (shift*k < GMP_NUMB_BITS);

  ASSERT (hn > 0);
  ASSERT (hn <= n);

  /* The degree k is also the number of full-size coefficients, so
   * that last coefficient, of size hn, starts at xp + k*n. */

#if HAVE_NATIVE_mpn_addlsh_n
  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
  for (i = 4; i < k; i += 2)
    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);

  tp[n] = mpn_lshift (tp, xp+n, n, shift);
  for (i = 3; i < k; i+= 2)
    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);

  if (k & 1)
    {
      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
      MPN_INCR_U (tp + hn, n+1 - hn, cy);
    }
  else
    {
      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
    }

#else /* !HAVE_NATIVE_mpn_addlsh_n */
  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
  xp2[n] += mpn_add_n (xp2, xp, tp, n);
  for (i = 4; i < k; i += 2)
    {
      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
    }

  tp[n] = mpn_lshift (tp, xp+n, n, shift);
  for (i = 3; i < k; i+= 2)
    {
      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
      tp[n] += mpn_add_n (tp, tp, xm2, n);
    }

  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
  if (k & 1)
    mpn_add (tp, tp, n+1, xm2, hn+1);
  else
    mpn_add (xp2, xp2, n+1, xm2, hn+1);
#endif /* !HAVE_NATIVE_mpn_addlsh_n */

  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;

#if HAVE_NATIVE_mpn_sumdiff_n
  if (neg)
    mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1);
  else
    mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1);
#else 
  if (neg)
    mpn_sub_n (xm2, tp, xp2, n + 1);
  else
    mpn_sub_n (xm2, xp2, tp, n + 1);

  mpn_add_n (xp2, xp2, tp, n + 1);
#endif

  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));

  return neg;
}
Example #28
0
int
mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
{
  mp_size_t num1_size = op1->_mp_num._mp_size;
  mp_size_t den1_size = op1->_mp_den._mp_size;
  mp_size_t num2_size = op2->_mp_num._mp_size;
  mp_size_t den2_size = op2->_mp_den._mp_size;
  mp_size_t tmp1_size, tmp2_size;
  mp_ptr tmp1_ptr, tmp2_ptr;
  mp_size_t num1_sign;
  int cc;
  TMP_DECL;

  /* need canonical signs to get right result */
  ASSERT (den1_size > 0);
  ASSERT (den2_size > 0);

  if (num1_size == 0)
    return -num2_size;
  if (num2_size == 0)
    return num1_size;
  if ((num1_size ^ num2_size) < 0) /* I.e. are the signs different? */
    return num1_size;

  num1_sign = num1_size;
  num1_size = ABS (num1_size);
  num2_size = ABS (num2_size);

  tmp1_size = num1_size + den2_size;
  tmp2_size = num2_size + den1_size;

  /* 1. Check to see if we can tell which operand is larger by just looking at
     the number of limbs.  */

  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.
     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */
  if (tmp1_size > tmp2_size + 1)
    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */
    return num1_sign;
  if (tmp2_size > tmp1_size + 1)
    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */
    return -num1_sign;

  /* 2. Same, but compare the number of significant bits.  */
  {
    int cnt1, cnt2;
    mp_bitcnt_t bits1, bits2;

    count_leading_zeros (cnt1, op1->_mp_num._mp_d[num1_size - 1]);
    count_leading_zeros (cnt2, op2->_mp_den._mp_d[den2_size - 1]);
    bits1 = tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;

    count_leading_zeros (cnt1, op2->_mp_num._mp_d[num2_size - 1]);
    count_leading_zeros (cnt2, op1->_mp_den._mp_d[den1_size - 1]);
    bits2 = tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;

    if (bits1 > bits2 + 1)
      return num1_sign;
    if (bits2 > bits1 + 1)
      return -num1_sign;
  }

  /* 3. Finally, cross multiply and compare.  */

  TMP_MARK;
  TMP_ALLOC_LIMBS_2 (tmp1_ptr,tmp1_size, tmp2_ptr,tmp2_size);

  if (num1_size >= den2_size)
    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
			       op1->_mp_num._mp_d, num1_size,
			       op2->_mp_den._mp_d, den2_size);
  else
    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
			       op2->_mp_den._mp_d, den2_size,
			       op1->_mp_num._mp_d, num1_size);

   if (num2_size >= den1_size)
     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
				op2->_mp_num._mp_d, num2_size,
				op1->_mp_den._mp_d, den1_size);
   else
     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
				op1->_mp_den._mp_d, den1_size,
				op2->_mp_num._mp_d, num2_size);


  cc = tmp1_size - tmp2_size != 0
    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);
  TMP_FREE;
  return num1_sign < 0 ? -cc : cc;
}
Example #29
0
void
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
{
  mp_ptr xp, tp, qp, gp, this_gp;
  mp_srcptr bp, ep, mp;
  mp_size_t bn, es, en, mn, xn;
  mp_limb_t invm, c;
  unsigned long int enb;
  mp_size_t i, K, j, l, k;
  int m_zero_cnt, e_zero_cnt;
  int sh;
  int use_redc;
#if HANDLE_NEGATIVE_EXPONENT
  mpz_t new_b;
#endif
#if REDUCE_EXPONENT
  mpz_t new_e;
#endif
  TMP_DECL;

  mp = PTR(m);
  mn = ABSIZ (m);
  if (mn == 0)
    DIVIDE_BY_ZERO;

  TMP_MARK;

  es = SIZ (e);
  if (es <= 0)
    {
      if (es == 0)
	{
	  /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
	     m equals 1.  */
	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
	  PTR(r)[0] = 1;
	  TMP_FREE;	/* we haven't really allocated anything here */
	  return;
	}
#if HANDLE_NEGATIVE_EXPONENT
      MPZ_TMP_INIT (new_b, mn + 1);

      if (! mpz_invert (new_b, b, m))
	DIVIDE_BY_ZERO;
      b = new_b;
      es = -es;
#else
      DIVIDE_BY_ZERO;
#endif
    }
  en = es;

#if REDUCE_EXPONENT
  /* Reduce exponent by dividing it by phi(m) when m small.  */
  if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
    {
      MPZ_TMP_INIT (new_e, 2);
      mpz_mod_ui (new_e, e, phi (mp[0]));
      e = new_e;
    }
#endif

  use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
  if (use_redc)
    {
      /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
      modlimb_invert (invm, mp[0]);
      invm = -invm;
    }
  else
    {
      /* Normalize m (i.e. make its most significant bit set) as required by
	 division functions below.  */
      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
      m_zero_cnt -= GMP_NAIL_BITS;
      if (m_zero_cnt != 0)
	{
	  mp_ptr new_mp;
	  new_mp = TMP_ALLOC_LIMBS (mn);
	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
	  mp = new_mp;
	}
    }

  /* Determine optimal value of k, the number of exponent bits we look at
     at a time.  */
  count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
  e_zero_cnt -= GMP_NAIL_BITS;
  enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
  k = 1;
  K = 2;
  while (2 * enb > K * (2 + k * (3 + k)))
    {
      k++;
      K *= 2;
      if (k == 10)			/* cap allocation */
	break;
    }

  tp = TMP_ALLOC_LIMBS (2 * mn);
  qp = TMP_ALLOC_LIMBS (mn + 1);

  gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);

  /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
  bn = ABSIZ (b);
  bp = PTR(b);
  /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
     for speed or correctness to do this when b and m have the same number of
     limbs, perhaps remove mpn_cmp call.  */
  if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
    {
      /* Reduce possibly huge base while moving it to gp[0].  Use a function
	 call to reduce, since we don't want the quotient allocation to
	 live until function return.  */
      if (use_redc)
	{
	  reduce (tp + mn, bp, bn, mp, mn);	/* b mod m */
	  MPN_ZERO (tp, mn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
	}
      else
	{
	  reduce (gp, bp, bn, mp, mn);
	}
    }
  else
    {
      /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
	 the rest of the function, but slows things down when the |b| << m.  */
      if (use_redc)
	{
	  MPN_ZERO (tp, mn);
	  MPN_COPY (tp + mn, bp, bn);
	  MPN_ZERO (tp + mn + bn, mn - bn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
	}
      else
	{
	  MPN_COPY (gp, bp, bn);
	  MPN_ZERO (gp + bn, mn - bn);
	}
    }

  /* Compute xx^i for odd g < 2^i.  */

  xp = TMP_ALLOC_LIMBS (mn);
  mpn_sqr (tp, gp, mn);
  if (use_redc)
    mpn_redc_1 (xp, tp, mp, mn, invm);		/* xx = x^2*R^n */
  else
    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
  this_gp = gp;
  for (i = 1; i < K / 2; i++)
    {
      mpn_mul_n (tp, this_gp, xp, mn);
      this_gp += mn;
      if (use_redc)
	mpn_redc_1 (this_gp,tp, mp, mn, invm);	/* g[i] = x^(2i+1)*R^n */
      else
	mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
    }

  /* Start the real stuff.  */
  ep = PTR (e);
  i = en - 1;				/* current index */
  c = ep[i];				/* current limb */
  sh = GMP_NUMB_BITS - e_zero_cnt;	/* significant bits in ep[i] */
  sh -= k;				/* index of lower bit of ep[i] to take into account */
  if (sh < 0)
    {					/* k-sh extra bits are needed */
      if (i > 0)
	{
	  i--;
	  c <<= (-sh);
	  sh += GMP_NUMB_BITS;
	  c |= ep[i] >> sh;
	}
    }
Example #30
0
/* Check divide and conquer division routine. */
void
check_dc_div_qr (void)
{
    mp_limb_t np[2*MAX_LIMBS];
    mp_limb_t np2[2*MAX_LIMBS];
    mp_limb_t rp[2*MAX_LIMBS+1];
    mp_limb_t dp[MAX_LIMBS];
    mp_limb_t qp[2*MAX_LIMBS];
    mp_limb_t dip, d1ip, cy;

    mp_size_t nn, rn, dn, qn;

    gmp_randstate_t rands;

    int i, j, s;
    gmp_randinit_default(rands);

    for (i = 0; i < ITERS; i++)
    {
        dn = (random() % (MAX_LIMBS - 5)) + 6;
        nn = (random() % (MAX_LIMBS - 3)) + dn + 3;

        mpn_rrandom (np, rands, nn);
        mpn_rrandom (dp, rands, dn);
        dp[dn-1] |= GMP_LIMB_HIGHBIT;

        MPN_COPY(np2, np, nn);

        mpir_invert_pi2(dip, d1ip, dp[dn - 1], dp[dn - 2]);

        qn = nn - dn + 1;

        qp[qn - 1] = mpn_dc_div_qr(qp, np, nn, dp, dn, dip, d1ip);

        MPN_NORMALIZE(qp, qn);

        if (qn)
        {
            if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);
            else mpn_mul(rp, dp, dn, qp, qn);

            rn = dn + qn;
            MPN_NORMALIZE(rp, rn);

            if (rn > nn)
            {
                printf("failed: q*d has too many limbs\n");
                abort();
            }

            if (mpn_cmp(rp, np2, nn) > 0)
            {
                printf("failed: remainder negative\n");
                abort();
            }

            mpn_sub(rp, np2, nn, rp, rn);
            rn = nn;
            MPN_NORMALIZE(rp, rn);
        } else
        {
            rn = nn;
            MPN_COPY(rp, np, nn);
        }

        s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
        if (s >= 0)
        {
            printf ("failed:\n");
            printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
            gmp_printf (" np: %Nx\n\n", np2, nn);
            gmp_printf (" dp: %Nx\n\n", dp, dn);
            gmp_printf (" qp: %Nx\n\n", qp, qn);
            gmp_printf (" rp: %Nx\n\n", rp, rn);
            abort ();
        }

        if (mpn_cmp(rp, np, rn) != 0)
        {
            printf("failed: remainder does not match\n");
            gmp_printf (" np: %Nx\n\n", np2, nn);
            gmp_printf (" dp: %Nx\n\n", dp, dn);
            gmp_printf (" qp: %Nx\n\n", qp, qn);
            gmp_printf (" rp: %Nx\n\n", rp, rn);
            gmp_printf (" rp2: %Nx\n\n", np, rn);
        }
    }

    gmp_randclear(rands);
}