Пример #1
0
static void
check_one (mp_ptr qp, mp_srcptr rp,
	   mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
	   const char *fname, mp_limb_t q_allowed_err)
{
  mp_size_t qn = nn - dn + 1;
  mp_ptr tp;
  const char *msg;
  const char *tvalue;
  mp_limb_t i;
  TMP_DECL;
  TMP_MARK;

  tp = TMP_ALLOC_LIMBS (nn + 1);
  if (dn >= qn)
    refmpn_mul (tp, dp, dn, qp, qn);
  else
    refmpn_mul (tp, qp, qn, dp, dn);

  for (i = 0; i < q_allowed_err && (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0); i++)
    ASSERT_NOCARRY (refmpn_sub (tp, tp, nn+1, dp, dn));

  if (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0)
    {
      msg = "q too large";
      tvalue = "Q*D";
    error:
      printf ("\r*******************************************************************************\n");
      printf ("%s failed test %ld: %s\n", fname, test, msg);
      printf ("N=    "); dumpy (np, nn);
      printf ("D=    "); dumpy (dp, dn);
      printf ("Q=    "); dumpy (qp, qn);
      if (rp)
	{ printf ("R=    "); dumpy (rp, dn); }
      printf ("%5s=", tvalue); dumpy (tp, nn+1);
      printf ("nn = %ld, dn = %ld, qn = %ld\n", nn, dn, qn);
      abort ();
    }

  ASSERT_NOCARRY (refmpn_sub_n (tp, np, tp, nn));
  tvalue = "N-Q*D";
  if (!mpn_zero_p (tp + dn, nn - dn) || mpn_cmp (tp, dp, dn) >= 0)
    {
      msg = "q too small";
      goto error;
    }

  if (rp && mpn_cmp (rp, tp, dn) != 0)
    {
      msg = "r incorrect";
      goto error;
    }

  TMP_FREE;
}
Пример #2
0
/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
int
mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
{
  unsigned i;
  int neg;

  ASSERT (k >= 4);

  ASSERT (hn > 0);
  ASSERT (hn <= n);

  /* The degree k is also the number of full-size coefficients, so
   * that last coefficient, of size hn, starts at xp + k*n. */

  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
  for (i = 4; i < k; i += 2)
    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));

  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
  for (i = 5; i < k; i += 2)
    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));

  if (k & 1)
    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
  else
    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));

  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;

#if HAVE_NATIVE_mpn_add_n_sub_n
  if (neg)
    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
  else
    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
#else
  if (neg)
    mpn_sub_n (xm1, tp, xp1, n + 1);
  else
    mpn_sub_n (xm1, xp1, tp, n + 1);

  mpn_add_n (xp1, xp1, tp, n + 1);
#endif

  ASSERT (xp1[n] <= k);
  ASSERT (xm1[n] <= k/2 + 1);

  return neg;
}
Пример #3
0
static int
abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
  int result;
  result = abs_sub_n (rm, rp, rs, n);
  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
  return result;
}
Пример #4
0
/* Computes R -= A * B. Result must be non-negative. Normalized down
   to size an, and resulting size is returned. */
static mp_size_t
submul (mp_ptr rp, mp_size_t rn,
	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
{
  mp_ptr tp;
  TMP_DECL;

  ASSERT (bn > 0);
  ASSERT (an >= bn);
  ASSERT (rn >= an);
  ASSERT (an + bn <= rn + 1);

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS (an + bn);

  mpn_mul (tp, ap, an, bp, bn);
  if (an + bn > rn)
    {
      ASSERT (tp[rn] == 0);
      bn--;
    }
  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
  TMP_FREE;

  while (rn > an && (rp[rn-1] == 0))
    rn--;

  return rn;
}
Пример #5
0
static int
add_signed_n (mp_ptr rp,
	      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
{
  if (as != bs)
    return as ^ abs_sub_n (rp, ap, bp, n);
  else
    {
      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
      return as;
    }
}
Пример #6
0
void
mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));

  if (n == 1)
    invert_limb (*ip, *dp);
  else {
    TMP_DECL;

    TMP_MARK;
    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
      {
	/* Maximum scratch needed by this branch: 2*n */
	mp_size_t i;
	mp_ptr xp;

	xp = scratch;				/* 2 * n limbs */
	for (i = n - 1; i >= 0; i--)
	  xp[i] = GMP_NUMB_MAX;
	mpn_com (xp + n, dp, n);
	if (n == 2) {
	  mpn_divrem_2 (ip, 0, xp, 4, dp);
	} else {
	  gmp_pi1_t inv;
	  invert_pi1 (inv, dp[n-1], dp[n-2]);
	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
	}
      }
    else { /* Use approximated inverse; correct the result if needed. */
      mp_limb_t e; /* The possible error in the approximate inverse */

      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
      e = mpn_ni_invertappr (ip, dp, n, scratch);

      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
	/* Code to detect and correct the "off by one" approximation. */
	mpn_mul_n (scratch, ip, dp, n);
	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
	if (! mpn_add (scratch, scratch, 2*n, dp, n))
	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
      }
    }
    TMP_FREE;
  }
}
Пример #7
0
/* (rp, 2n) = (xp, n)*(yp, n) */
static void
mpn_mulshort_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t m;
  mp_limb_t t;
  mp_ptr rpn2;

  ASSERT(n >= 1);
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));
  
  if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))
    {
      mpn_mul_basecase(rp, xp, n, yp, n);
      
      return;
    }

  if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))
    {
      mpn_mulshort_n_basecase(rp, xp, yp, n);
      
      return;
    }

  /* choose optimal m s.t. n + 2 <= 2m,  m < n */
  ASSERT (n >= 4);

  m = 87 * n / 128;
  
  if (2 * m < n + 2)
    m = (n + 1) / 2 + 1;
  
  if (m >= n)
    m = n - 1;
  
  ASSERT (n + 2 <= 2 * m);
  ASSERT (m < n);
  
  rpn2 = rp + n - 2;
  
  mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);
  mpn_mulshort_n (rp, xp, yp + m, n - m);

  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
  
  mpn_mulshort_n (rp, xp + m, yp, n - m);
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
  
  umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);
  rp[0] = t >> GMP_NAIL_BITS;
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
  
  umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);
  rp[0] = t >> GMP_NAIL_BITS;
  
  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
  
  return;
}
Пример #8
0
REGPARM_ATTR (1) static void
mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
{
    mp_size_t  xsize, ysize, tsize, wsize, wsize_signed;
    mp_ptr     wp, tp;
    mp_limb_t  c, high;
    TMP_DECL;

    /* w unaffected if x==0 or y==0 */
    xsize = SIZ(x);
    ysize = SIZ(y);
    if (xsize == 0 || ysize == 0)
        return;

    /* make x the bigger of the two */
    if (ABS(ysize) > ABS(xsize))
    {
        MPZ_SRCPTR_SWAP (x, y);
        MP_SIZE_T_SWAP (xsize, ysize);
    }

    sub ^= ysize;
    ysize = ABS(ysize);

    /* use mpn_addmul_1/mpn_submul_1 if possible */
    if (ysize == 1)
    {
        mpz_aorsmul_1 (w, x, PTR(y)[0], sub);
        return;
    }

    sub ^= xsize;
    xsize = ABS(xsize);

    wsize_signed = SIZ(w);
    sub ^= wsize_signed;
    wsize = ABS(wsize_signed);

    tsize = xsize + ysize;
    wp = MPZ_REALLOC (w, MAX (wsize, tsize) + 1);

    if (wsize_signed == 0)
    {
        /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,
        since we know x,y!=0 but w==0.  */
        high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
        tsize -= (high == 0);
        SIZ(w) = (sub >= 0 ? tsize : -tsize);
        return;
    }

    TMP_MARK;
    tp = TMP_ALLOC_LIMBS (tsize);

    high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize);
    tsize -= (high == 0);
    ASSERT (tp[tsize-1] != 0);
    if (sub >= 0)
    {
        mp_srcptr up    = wp;
        mp_size_t usize = wsize;

        if (usize < tsize)
        {
            up	= tp;
            usize = tsize;
            tp	= wp;
            tsize = wsize;

            wsize = usize;
        }

        c = mpn_add (wp, up,usize, tp,tsize);
        wp[wsize] = c;
        wsize += (c != 0);
    }
    else
    {
        mp_srcptr up    = wp;
        mp_size_t usize = wsize;

        if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
        {
            up	= tp;
            usize = tsize;
            tp	= wp;
            tsize = wsize;

            wsize = usize;
            wsize_signed = -wsize_signed;
        }

        ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
        wsize = usize;
        MPN_NORMALIZE (wp, wsize);
    }

    SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize);

    TMP_FREE;
}
Пример #9
0
mp_limb_t
mpn_dc_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);		/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn > dn)
    {
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0, d11, d01;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];
     d01 = d0 + 1;
     d11 = d1 + (d01 < d0);

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n2, n1, n0, d11, d01, d1, d0, dinv);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
      /* Do a 2qn / qn division */
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
	  else
	    qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}

      qn = nn - dn - qn;
      do
	{
	  qp -= dn;
	  np -= dn;
	  ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp));
	  qn -= dn;
	}
      while (qn > 0);
    }
  else
    {
      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
      else
	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

      if (qn != dn)
	{
	  if (qn > dn - qn)
	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	  else
	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	  if (qh != 0)
	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	  while (cy != 0)
	    {
	      qh -= mpn_sub_1 (qp, qp, qn, 1);
	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
	    }
	}
    }

  TMP_FREE;
  return qh;
}
Пример #10
0
void
mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
			   mp_size_t w6n, mp_ptr tp)
{
  mp_size_t m;
  mp_limb_t cy;

  m = 2*n + 1;
#define w0 rp
#define w2 (rp + 2*n)
#define w6 (rp + 6*n)

  ASSERT (w6n > 0);
  ASSERT (w6n <= 2*n);

  /* Using formulas similar to Marco Bodrato's

     W5 = W5 + W4
     W1 =(W4 - W1)/2
     W4 = W4 - W0
     W4 =(W4 - W1)/4 - W6*16
     W3 =(W2 - W3)/2
     W2 = W2 - W3

     W5 = W5 - W2*65      May be negative.
     W2 = W2 - W6 - W0
     W5 =(W5 + W2*45)/2   Now >= 0 again.
     W4 =(W4 - W2)/3
     W2 = W2 - W4

     W1 = W5 - W1         May be negative.
     W5 =(W5 - W3*8)/9
     W3 = W3 - W5
     W1 =(W1/15 + W5)/2   Now >= 0 again.
     W5 = W5 - W1

     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
	   W4 = f(2), W5 = f(1/2), W6 = f(oo),

     Note that most intermediate results are positive; the ones that
     may be negative are represented in two's complement. We must
     never shift right a value that may be negative, since that would
     invalidate the sign bit. On the other hand, divexact by odd
     numbers work fine with two's complement.
  */

  mpn_add_n (w5, w5, w4, m);
  if (flags & toom7_w1_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w1, w1, w4, m);
#else
      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w1, w4, w1, m);
#else
      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  mpn_sub (w4, w4, m, w0, 2*n);
  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
  mpn_rshift (w4, w4, m, 2); /* w4>=0 */

  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
  mpn_sub (w4, w4, m, tp, w6n+1);

  if (flags & toom7_w3_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w3, w3, w2, m);
#else
      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w3, w2, w3, m);
#else
      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }

  mpn_sub_n (w2, w2, w3, m);

  mpn_submul_1 (w5, w2, m, 65);
  mpn_sub (w2, w2, m, w6, w6n);
  mpn_sub (w2, w2, m, w0, 2*n);

  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
  mpn_rshift (w5, w5, m, 1);
  mpn_sub_n (w4, w4, w2, m);

  mpn_divexact_by3 (w4, w4, m);
  mpn_sub_n (w2, w2, w4, m);

  mpn_sub_n (w1, w5, w1, m);
  mpn_lshift (tp, w3, m, 3);
  mpn_sub_n (w5, w5, tp, m);
  mpn_divexact_by9 (w5, w5, m);
  mpn_sub_n (w3, w3, w5, m);

  mpn_divexact_by15 (w1, w1, m);
  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
  mpn_sub_n (w5, w5, w1, m);

  /* These bounds are valid for the 4x4 polynomial product of toom44,
   * and they are conservative for toom53 and toom62. */
  ASSERT (w1[2*n] < 2);
  ASSERT (w2[2*n] < 3);
  ASSERT (w3[2*n] < 4);
  ASSERT (w4[2*n] < 3);
  ASSERT (w5[2*n] < 2);

  /* Addition chain. Note carries and the 2n'th limbs that need to be
   * added in.
   *
   * Special care is needed for w2[2n] and the corresponding carry,
   * since the "simple" way of adding it all together would overwrite
   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
   * the high half of w3 and the low half of w4.
   *
   *         7    6    5    4    3    2    1    0
   *    |    |    |    |    |    |    |    |    |
   *                  ||w3 (2n+1)|
   *             ||w4 (2n+1)|
   *        ||w5 (2n+1)|        ||w1 (2n+1)|
   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
   *  -----------------------------------------------
   *  r |    |    |    |    |    |    |    |    |
   *        c7   c6   c5   c4   c3                 Carries to propagate
   */

  cy = mpn_add_n (rp + n, rp + n, w1, m);
  MPN_INCR_U (w2 + n + 1, n , cy);
  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
  if (w6n > n + 1)
    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
  else
    {
      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
#if WANT_ASSERT
      {
	mp_size_t i;
	for (i = w6n; i <= n; i++)
	  ASSERT (w5[n + i] == 0);
      }
#endif
    }
}
void
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
			   mp_ptr r3, mp_ptr r7,
			   mp_size_t spt, mp_ptr ws)
{
  mp_limb_signed_t cy;
  mp_ptr r5, r1;
  r5 = (pp + 3 * n);			/* 3n+1 */
  r1 = (pp + 7 * n);			/* spt */

  /******************************* interpolation *****************************/

  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);

  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);

  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
  cy = mpn_sub_n (r7, r7, r1, spt);
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));

  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));

  mpn_divexact_by45 (r3, r3, 3 * n + 1);

  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));

  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));

  /* last interpolation steps... */
  /* ... are mixed with recomposition */

  /***************************** recomposition *******************************/
  /*
    pp[] prior to operations:
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp

    summation scheme for remaining operations:
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
	  ||_H r3|_M r3|_L*r3|
				  ||_H_r7|_M_r7|_L_r7|
		      ||-H r3|-M r3|-L*r3|
				  ||-H*r5|-M_r5|-L_r5|
  */

  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
  if (0 > cy)
    MPN_DECR_U (r7 + n, 2*n + 1, 1);
  else
    MPN_INCR_U (r7 + n, 2*n + 1, cy);

  cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
  MPN_DECR_U (r7 + 2*n, n + 1, cy);

  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
  if (UNLIKELY(0 > cy))
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
  else
    MPN_INCR_U (r5 + n + 1, 2*n, cy);

  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */

  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
  if (LIKELY(spt != n))
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
  else
    ASSERT (r3[3*n] | cy == 0);
}
Пример #12
0
void
mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
{
#if 1
  __mpf_struct uu;
  mp_limb_t ul;

  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }

  ul = u;
  uu._mp_size = 1;
  uu._mp_d = &ul;
  uu._mp_exp = 1;
  mpf_sub (r, &uu, v);

#else
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t uexp;
  mp_size_t ediff;
  int negate;
  mp_limb_t ulimb;
  TMP_DECL;

  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      mpf_set_ui (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if (vsize < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add_ui (r, &v_negated, u);
      return;
    }

  /* Signs are now known to be the same.  */
  ASSERT (vsize > 0);
  ulimb = u;
  /* Make U be the operand with the largest exponent.  */
  negate = 1 < v->_mp_exp;
  prec = r->_mp_prec + negate;
  rp = r->_mp_d;
  if (negate)
    {
      usize = vsize;
      vsize = 1;
      up = v->_mp_d;
      vp = &ulimb;
      uexp = v->_mp_exp;
      ediff = uexp - 1;

      /* If U extends beyond PREC, ignore the part that does.  */
      if (usize > prec)
	{
	  up += usize - prec;
	  usize = prec;
	}
      ASSERT (ediff > 0);
    }
  else
    {
      vp = v->_mp_d;
      ediff = 1 - v->_mp_exp;
  /* Ignore leading limbs in U and V that are equal.  Doing
     this helps increase the precision of the result.  */
      if (ediff == 0 && ulimb == vp[vsize - 1])
	{
	  usize = 0;
	  vsize--;
	  uexp = 0;
	  /* Note that V might now have leading zero limbs.
	     In that case we have to adjust uexp.  */
	  for (;;)
	    {
	      if (vsize == 0) {
		rsize = 0;
		uexp = 0;
		goto done;
	      }
	      if ( vp[vsize - 1] != 0)
		break;
	      vsize--, uexp--;
	    }
	}
      else
	{
	  usize = 1;
	  uexp = 1;
	  up = &ulimb;
	}
      ASSERT (usize <= prec);
    }

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (rp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
  /* If V extends beyond PREC, ignore the part that does.
     Note that this can make vsize neither zero nor negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      ASSERT (vsize > 0);
      for (;;)
	{
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      ASSERT (usize > 0 && vsize > 0);
      TMP_MARK;

      tp = TMP_ALLOC_LIMBS (prec);

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      ASSERT (usize == 1 && vsize >= 1 && ulimb == *up); /* usize is 1>ediff, vsize >= 1 */
	      if (1 < vsize)
		{
		  /* u        */
		  /* vvvvvvv  */
		  rsize = vsize;
		  vsize -= 1;
		  /* mpn_cmp (up, vp + vsize - usize, usize) > 0 */
		  if (ulimb > vp[vsize])
		    {
		      tp[vsize] = ulimb - vp[vsize] - 1;
		      ASSERT_CARRY (mpn_neg (tp, vp, vsize));
		    }
		  else
		    {
		      /* vvvvvvv  */  /* Swap U and V. */
		      /* u        */
		      MPN_COPY (tp, vp, vsize);
		      tp[vsize] = vp[vsize] - ulimb;
		      negate = 1;
		    }
		}
	      else /* vsize == usize == 1 */
		{
		  /* u     */
		  /* v     */
		  rsize = 1;
		  negate = ulimb < vp[0];
		  tp[0] = negate ? vp[0] - ulimb: ulimb - vp[0];
		}
	    }
	  else
	    {
	      ASSERT (vsize + ediff <= usize);
	      ASSERT (vsize == 1 && usize >= 2 && ulimb == *vp);
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - 1;
		  MPN_COPY (tp, up, size);
		  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up + size, usize - size, ulimb));
		  rsize = usize;
		}
		/* Other cases are not possible */
		/* uuuu     */
		/*   vvvvv  */
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  ASSERT_CARRY (mpn_neg (tp, vp, vsize));
	  rsize = vsize + ediff;
	  size = rsize - usize;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up, usize, CNST_LIMB (1)));
	}

      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  uexp--;
	}
      MPN_COPY (rp, tp, rsize);
      TMP_FREE;
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  r->_mp_exp = uexp;
#endif
}
Пример #13
0
/* Computes |v| = |(g - u a)| / b, where u may be positive or
   negative, and v is of the opposite sign. a, b are of size n, u and
   v at most size n, and v must have space for n+1 limbs. */
static mp_size_t
compute_v (mp_ptr vp,
	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
	   mp_srcptr gp, mp_size_t gn,
	   mp_srcptr up, mp_size_t usize,
	   mp_ptr tp)
{
  mp_size_t size;
  mp_size_t an;
  mp_size_t bn;
  mp_size_t vn;

  ASSERT (n > 0);
  ASSERT (gn > 0);
  ASSERT (usize != 0);

  size = ABS (usize);
  ASSERT (size <= n);

  an = n;
  MPN_NORMALIZE (ap, an);

  if (an >= size)
    mpn_mul (tp, ap, an, up, size);
  else
    mpn_mul (tp, up, size, ap, an);

  size += an;

  ASSERT (gn <= size);

  if (usize > 0)
    {
      /* |v| = -v = (u a - g) / b */

      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
      MPN_NORMALIZE (tp, size);
      if (size == 0)
	return 0;
    }
  else
    { /* usize < 0 */
      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
      if (cy)
	tp[size++] = cy;
    }

  /* Now divide t / b. There must be no remainder */
  bn = n;
  MPN_NORMALIZE (bp, bn);
  ASSERT (size >= bn);

  vn = size + 1 - bn;
  ASSERT (vn <= n + 1);

  /* FIXME: Use divexact. Or do the entire calculation mod 2^{n *
     GMP_NUMB_BITS}. */
  mpn_tdiv_qr (vp, tp, 0, tp, size, bp, bn);
  vn -= (vp[vn-1] == 0);

  /* Remainder must be zero */
#if WANT_ASSERT
  {
    mp_size_t i;
    for (i = 0; i < bn; i++)
      {
	ASSERT (tp[i] == 0);
      }
  }
#endif
  return vn;
}
Пример #14
0
/* Perform a few steps, using some of mpn_nhgcd2, subtraction and division.
   Reduces the size by almost one limb or more, but never below the
   given size s. Return new size for a and b, or 0 if no more steps
   are possible. M = NULL is allowed, if M is not needed.

   Needs temporary space for division, n + 1 limbs, and for
   ngcd_matrix1_vector, n limbs. */
mp_size_t
mpn_ngcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
	       struct ngcd_matrix *M, mp_ptr tp)
{
  struct ngcd_matrix1 M1;
  mp_limb_t mask;
  mp_limb_t ah, al, bh, bl;
  mp_size_t an, bn, qn;
  mp_ptr qp;
  mp_ptr rp;
  int col;

  ASSERT (n > s);

  mask = ap[n-1] | bp[n-1];
  ASSERT (mask > 0);

  if (n == s + 1)
    {
      if (mask < 4)
	goto subtract;

      ah = ap[n-1]; al = ap[n-2];
      bh = bp[n-1]; bl = bp[n-2];
    }
  else if (mask & GMP_NUMB_HIGHBIT)
    {
      ah = ap[n-1]; al = ap[n-2];
      bh = bp[n-1]; bl = bp[n-2];
    }
  else
    {
      int shift;

      count_leading_zeros (shift, mask);
      ah = MPN_EXTRACT_LIMB (shift, ap[n-1], ap[n-2]);
      al = MPN_EXTRACT_LIMB (shift, ap[n-2], ap[n-3]);
      bh = MPN_EXTRACT_LIMB (shift, bp[n-1], bp[n-2]);
      bl = MPN_EXTRACT_LIMB (shift, bp[n-2], bp[n-3]);
    }

  /* Try an mpn_nhgcd2 step */
  if (mpn_nhgcd2 (ah, al, bh, bl, &M1))
    {
      /* Multiply M <- M * M1 */
      if (M)
	ngcd_matrix_mul_1 (M, &M1);

      /* Multiply M1^{-1} (a;b) */
      return mpn_ngcd_matrix1_vector (&M1, n, ap, bp, tp);
    }

 subtract:
  /* There are two ways in which mpn_nhgcd2 can fail. Either one of ah and
     bh was too small, or ah, bh were (almost) equal. Perform one
     subtraction step (for possible cancellation of high limbs),
     followed by one division. */

  /* Since we must ensure that #(a-b) > s, we handle cancellation of
     high limbs explicitly up front. (FIXME: Or is it better to just
     subtract, normalize, and use an addition to undo if it turns out
     the the difference is too small?) */
  for (an = n; an > s; an--)
    if (ap[an-1] != bp[an-1])
      break;

  if (an == s)
    return 0;

  /* Maintain a > b. When needed, swap a and b, and let col keep track
     of how to update M. */
  if (ap[an-1] > bp[an-1])
    {
      /* a is largest. In the subtraction step, we need to update
	 column 1 of M */
      col = 1;
    }
  else
    {
      MP_PTR_SWAP (ap, bp);
      col = 0;
    }

  bn = n;
  MPN_NORMALIZE (bp, bn);  
  if (bn <= s)
    return 0;
  
  /* We have #a, #b > s. When is it possible that #(a-b) < s? For
     cancellation to happen, the numbers must be of the form

       a = x + 1, 0,            ..., 0,            al
       b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl

     where al, bl denotes the least significant k limbs. If al < bl,
     then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
     then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */

  if (ap[an-1] == bp[an-1] + 1)
    {
      mp_size_t k;
      int c;
      for (k = an-1; k > s; k--)
	if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
	  break;

      MPN_CMP (c, ap, bp, k);
      if (c < 0)
	{
	  mp_limb_t cy;
	  
	  /* The limbs from k and up are cancelled. */
	  if (k == s)
	    return 0;
	  cy = mpn_sub_n (ap, ap, bp, k);
	  ASSERT (cy == 1);
	  an = k;
	}
      else
	{
	  ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
	  ap[k] = 1;
	  an = k + 1;
	}
    }
  else
    ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
  
  ASSERT (an > s);
  ASSERT (ap[an-1] > 0);
  ASSERT (bn > s);
  ASSERT (bp[bn-1] > 0);
  
  if (M)
    ngcd_matrix_update_1 (M, col);

  if (an < bn)
    {
      MPN_PTR_SWAP (ap, an, bp, bn);
      col ^= 1;
    }
  else if (an == bn)
    {
      int c;
      MPN_CMP (c, ap, bp, an);
      if (c < 0)
	{
	  MP_PTR_SWAP (ap, bp);
	  col ^= 1;
	}
    }

  /* Divide a / b. Store first the quotient (qn limbs) and then the
     remainder (bn limbs) starting at tp. */
  qn = an + 1 - bn;
  qp = tp;
  rp = tp + qn;

  /* FIXME: We could use an approximate division, that may return a
     too small quotient, and only guarantess that the size of r is
     almost the size of b. */
  mpn_tdiv_qr (qp, rp, 0, ap, an, bp, bn);
  qn -= (qp[qn -1] == 0);

  /* Normalize remainder */
  an = bn;
  for ( ; an > s; an--)
    if (rp[an-1] > 0)
      break;

  if (an > s)
    /* Include leading zero limbs */
    MPN_COPY (ap, rp, bn);
  else
    {
      /* Quotient is too large */
      mp_limb_t cy;

      cy = mpn_add (ap, bp, bn, rp, an);

      if (cy > 0)
	{
	  ASSERT (bn < n);
	  ap[bn] = cy;
	  bp[bn] = 0;
	  bn++;
	}

      MPN_DECR_U (qp, qn, 1);
      qn -= (qp[qn-1] == 0);
    }

  if (qn > 0 && M)
    ngcd_matrix_update_q (M, qp, qn, col);

  return bn;
}
Пример #15
0
mp_size_t
mpn_ngcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr s0p, mp_ptr u0, mp_ptr u1, 
		             mp_size_t *un, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
{
  /* Called when nhgcd or mpn_nhgcd2 has failed. Then either one of a or b
     is very small, or the difference is very small. Perform one
     subtraction followed by one division. */

  mp_size_t an, bn, cy, qn, qn2, u0n, u1n;
  int negate = 0;
  int c;

  ASSERT (n > 0);
  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);

  /* See to what extend ap and bp are the same */
  for (an = n; an > 0; an--)
    if (ap[an-1] != bp[an-1])
      break;

  if (an == 0)
    {
      /* ap OR bp is the gcd, two possible normalisations
	     u1 or -u0, pick the smallest
	  */
      MPN_COPY (gp, ap, n);
	  (*gn) = n;

      MPN_CMP(c, u1, u0, *un);
	  if (c <= 0) // u1 is smallest
	  {
		 MPN_NORMALIZE(u1, (*un));
         MPN_COPY (s0p, u1, (*un));
	  } else // -u0 is smallest
	  {
		 MPN_NORMALIZE(u0, (*un));
         MPN_COPY (s0p, u0, (*un));
		 (*un) = -(*un);
	  }
	  
	  return 0;
    }

  if (ap[an-1] < bp[an-1]) /* swap so that ap >= bp */
  {
	 MP_PTR_SWAP (ap, bp);
    MP_PTR_SWAP (u0, u1);
	 negate = ~negate;
  }

  bn = n;
  MPN_NORMALIZE (bp, bn);
  if (bn == 0)
    {
      /* ap is the gcd */
		MPN_COPY (gp, ap, n);
      MPN_NORMALIZE(u1, (*un));
      MPN_COPY (s0p, u1, (*un));
		if (negate) (*un) = -(*un);
      (*gn) = n;
	  
	  return 0;
    }

  ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an)); /* ap -= bp, u1 += u0 */
  MPN_NORMALIZE (ap, an);
  
  ASSERT (an > 0);
	
  cy = mpn_add_n(u1, u1, u0, *un);
  if (cy) u1[(*un)++] = cy;

  if (an < bn) /* make an >= bn */
  {
	  MPN_PTR_SWAP (ap, an, bp, bn);
	  MP_PTR_SWAP(u0, u1);
	  negate = ~negate;
  }
  else if (an == bn)
    {
      MPN_CMP (c, ap, bp, an);
      if (c < 0)
		{
			MP_PTR_SWAP (ap, bp);
		   MP_PTR_SWAP(u0, u1);
	      negate = ~negate;
      } else if (c == 0) /* gcd is ap OR bp */
		{
		 /* this case seems to never occur 
			it should happen only if ap = 2*bp
		 */
		 MPN_COPY (gp, ap, an);
         (*gn) = an;
		 /* As the gcd is ap OR bp, there are two possible 
		    cofactors here u1 or -u0, and we want the 
			least of the two.
		 */
		 MPN_CMP(c, u1, u0, *un);
		 if (c < 0) // u1 is less
		 {
			MPN_NORMALIZE(u1, (*un));
            MPN_COPY (s0p, u1, (*un));
            if (negate) (*un) = -(*un);
		 } else if (c > 0) // -u0 is less
		 {
			MPN_NORMALIZE(u0, (*un));
            MPN_COPY (s0p, u0, (*un));
            if (!negate) (*un) = -(*un);
		 } else // same
		 {
		    MPN_NORMALIZE(u0, (*un));
            MPN_COPY (s0p, u0, (*un));
		 }
         
		 return 0;
		}
    }

  ASSERT (an >= bn);

  qn = an - bn + 1;
  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn); /* ap -= q * bp, u1 += q * u0 */

  /* Normalizing seems to be the simplest way to test if the remainder
     is zero. */
  an = bn;
  MPN_NORMALIZE (ap, an);
  if (an == 0)
    {
      /* this case never seems to occur*/
	  /* gcd = bp */
	  MPN_COPY (gp, bp, bn);
      MPN_NORMALIZE(u0, (*un));
      MPN_COPY (s0p, u0, (*un));
      if (!negate) (*un) = -(*un);
      (*gn) = bn;
      
	  return 0;
    }

  qn2 = qn;
  u0n = (*un);
  MPN_NORMALIZE (tp, qn2);
  MPN_NORMALIZE (u0, u0n);

  if (u0n > 0)
  {

  if (qn2 > u0n) mpn_mul(tp + qn, tp, qn2, u0, u0n);
  else mpn_mul(tp + qn, u0, u0n, tp, qn2);

  u0n += qn2;
  MPN_NORMALIZE(tp + qn, u0n);

  if ((*un) >= u0n) 
  {
	  cy = mpn_add(u1, u1, (*un), tp + qn, u0n);
	  if (cy) u1[(*un)++] = cy;
  } else
  {
	  cy = mpn_add(u1, tp + qn, u0n, u1, (*un));
	  (*un) = u0n;
	  if (cy) u1[(*un)++] = cy;
  }
  }

  return bn;
}
void
mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
{
  mp_limb_t cy;
  mp_size_t n3;
  mp_size_t n3p1;
  n3 = 3 * n;
  n3p1 = n3 + 1;

#define   r4    (pp + n3)			/* 3n+1 */
#define   r2    (pp + 7 * n)			/* 3n+1 */
#define   r0    (pp +11 * n)			/* s+t <= 2*n */

  /******************************* interpolation *****************************/
  if (half != 0) {
    cy = mpn_sub_n (r3, r3, r0, spt);
    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);

    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);

    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
  };

  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);

#if HAVE_NATIVE_mpn_add_n_sub_n
  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
#else
  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
  MP_PTR_SWAP(r1, wsi);
#endif

  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);

#if HAVE_NATIVE_mpn_add_n_sub_n
  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
#else
  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
  MP_PTR_SWAP(r5, wsi);
#endif

  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);

#if AORSMUL_FASTER_AORS_AORSLSH
  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
#else
  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
#endif
  /* A division by 2835x4 follows. Warning: the operand can be negative! */
  mpn_divexact_by2835x4(r4, r4, n3p1);
  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));

#if AORSMUL_FASTER_2AORSLSH
  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
#else
  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
#endif
  mpn_divexact_by255(r5, r5, n3p1);

  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));

#if AORSMUL_FASTER_3AORSLSH
  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
#else
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
#endif
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
  mpn_divexact_by42525(r1, r1, n3p1);

#if AORSMUL_FASTER_AORS_2AORSLSH
  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
#else
  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
#endif
  mpn_divexact_by9x4(r2, r2, n3p1);

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));

  mpn_sub_n (r4, r2, r4, n3p1);
  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));

  mpn_add_n (r5, r5, r1, n3p1);
  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));

  /* last interpolation steps... */
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
  /* ... could be mixed with recomposition
	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
  */

  /***************************** recomposition *******************************/
  /*
    pp[] prior to operations:
    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp

    summation scheme for remaining operations:
    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
  */

  cy = mpn_add_n (pp + n, pp + n, r5, n);
  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
#if HAVE_NATIVE_mpn_add_nc
  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
#else
  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
#endif
  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);

  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
#if HAVE_NATIVE_mpn_add_nc
  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
#else
  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
#endif
  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);

  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
  if (half) {
    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
#if HAVE_NATIVE_mpn_add_nc
    if (LIKELY (spt > n)) {
      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
    } else {
      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
    }
#else
    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
    if (LIKELY (spt > n)) {
      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
    } else {
      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
    }
#endif
  } else {
    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
  }

#undef   r0
#undef   r2
#undef   r4
}
Пример #17
0
/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
 *
 * Resulting elements are of size up to rn + mn + 1.
 *
 * Temporary storage: 3 rn + 3 mn + 5. */
void
mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
			   mp_ptr tp)
{
  mp_ptr s0, t0, u0, u1;
  int r1s, r3s, s0s, t0s, u1s;
  s0 = tp; tp += rn + 1;
  t0 = tp; tp += mn + 1;
  u0 = tp; tp += rn + mn + 1;
  u1 = tp; /* rn + mn + 2 */

  MUL (u0, r1, rn, m2, mn);		/* u5 = s5 * t6 */
  r3s = abs_sub_n (r3, r3, r2, rn);	/* r3 - r2 */
  if (r3s)
    {
      r1s = abs_sub_n (r1, r1, r3, rn);
      r1[rn] = 0;
    }
  else
    {
      r1[rn] = mpn_add_n (r1, r1, r3, rn);
      r1s = 0;				/* r1 - r2 + r3  */
    }
  if (r1s)
    {
      s0[rn] = mpn_add_n (s0, r1, r0, rn);
      s0s = 0;
    }
  else if (r1[rn] != 0)
    {
      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
      s0s = 1;				/* s4 = -r0 + r1 - r2 + r3 */
					/* Reverse sign! */
    }
  else
    {
      s0s = abs_sub_n (s0, r0, r1, rn);
      s0[rn] = 0;
    }
  MUL (u1, r0, rn, m0, mn);		/* u0 = s0 * t0 */
  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
  ASSERT (r0[rn+mn] < 2);		/* u0 + u5 */

  t0s = abs_sub_n (t0, m3, m2, mn);
  u1s = r3s^t0s^1;			/* Reverse sign! */
  MUL (u1, r3, rn, t0, mn);		/* u2 = s2 * t2 */
  u1[rn+mn] = 0;
  if (t0s)
    {
      t0s = abs_sub_n (t0, m1, t0, mn);
      t0[mn] = 0;
    }
  else
    {
      t0[mn] = mpn_add_n (t0, t0, m1, mn);
    }

  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
     at r3. I'd expect that for matrices of random size, the high
     words t0[mn] and r1[rn] are non-zero with a pretty small
     probability. If that can be confirmed this should be done as an
     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
     add_n. */
  if (t0[mn] != 0)
    {
      MUL (r3, r1, rn, t0, mn + 1);	/* u3 = s3 * t3 */
      ASSERT (r1[rn] < 2);
      if (r1[rn] != 0)
	mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
    }
  else
    {
      MUL (r3, r1, rn + 1, t0, mn);
    }

  ASSERT (r3[rn+mn] < 4);

  u0[rn+mn] = 0;
  if (r1s^t0s)
    {
      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
    }
  else
    {
      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
      r3s = 0;				/* u3 + u5 */
    }

  if (t0s)
    {
      t0[mn] = mpn_add_n (t0, t0, m0, mn);
    }
  else if (t0[mn] != 0)
    {
      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
    }
  else
    {
      t0s = abs_sub_n (t0, t0, m0, mn);
    }
  MUL (u0, r2, rn, t0, mn + 1);		/* u6 = s6 * t4 */
  ASSERT (u0[rn+mn] < 2);
  if (r1s)
    {
      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
    }
  else
    {
      r1[rn] += mpn_add_n (r1, r1, r2, rn);
    }
  rn++;
  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
					/* u3 + u5 + u6 */
  ASSERT (r2[rn+mn-1] < 4);
  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
					/* -u2 + u3 + u5  */
  ASSERT (r3[rn+mn-1] < 3);
  MUL (u0, s0, rn, m1, mn);		/* u4 = s4 * t5 */
  ASSERT (u0[rn+mn-1] < 2);
  t0[mn] = mpn_add_n (t0, m3, m1, mn);
  MUL (u1, r1, rn, t0, mn + 1);		/* u1 = s1 * t1 */
  mn += rn;
  ASSERT (u1[mn-1] < 4);
  ASSERT (u1[mn] == 0);
  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
					/* -u2 + u3 - u4 + u5  */
  ASSERT (r1[mn-1] < 2);
  if (r3s)
    {
      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
    }
  else
    {
      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
					/* u1 + u2 - u3 - u5  */
    }
  ASSERT (r3[mn-1] < 2);
  if (t0s)
    {
      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
    }
  else
    {
      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
					/* u1 - u3 - u5 - u6  */
    }
  ASSERT (r2[mn-1] < 2);
}
Пример #18
0
void
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
			   mp_size_t k, mp_size_t twor, int sa,
			   mp_limb_t vinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  mp_size_t twok = k + k;
  mp_size_t kk1 = twok + 1;
  mp_ptr c1, v1, c3, vinf, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c  + k;
  v1 = c1 + k;
  c3 = v1 + k;
  vinf = c3 + k;
  c5 = vinf + k;

#define v0 (c)
  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
  */
  if (sa <= 0)
    mpn_add_n (v2, v2, vm1, kk1);
  else
    mpn_sub_n (v2, v2, vm1, kk1);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
						      /* (5 3 1 1 0)*/

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */

  /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
     tm1 >= 0                                            (0  1 0  1 0)
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
     and the division by two is exact */
  if (sa <= 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, v1, vm1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
     t1 >= 0
  */
  vinf[0] -= mpn_sub_n (v1, v1, c, twok);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  mpn_sub_n (v2, v2, v1, kk1);
  mpn_rshift (v2, v2, kk1, 1);
#endif

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */

  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
     result is v1 >= 0
  */
  mpn_sub_n (v1, v1, vm1, kk1);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0   v1-v0-tm1      hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */

  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
     result is v2 >= 0 */
  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
#ifdef HAVE_NATIVE_mpn_sublsh1_n
  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
#else
  cy = mpn_lshift (ws, vinf, twor, 1);
  cy += mpn_sub_n (v2, v2, ws, twor);
#endif
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);

  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
     result is >= 0 */
  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
  vinf[0] = saved;
  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
  __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */

  /* (8) vm1 <- vm1-t2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
     vm1 >= 0
  */
  mpn_sub_n (vm1, vm1, v2, kk1);            /* No overlapping here.        */

  /********************* Beginning the final phase **********************/

  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */

  /* (9) add t2 in {c+3k, ...} */
  cy = mpn_add_n (c3, c3, v2, kk1);
  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */

  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t1         vinf      tm1  t2
		    +t2 */

  /* add vm1 in {c+k, ...} */
  cy = mpn_add_n (c1, c1, vm1, kk1);
  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t1         vinf      tm1  t2
	  +tm1      +t2    */

#undef v0
#undef t2
}
Пример #19
0
/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
   point to an area large enough for the resulting cofactor, plus one
   limb extra. All in all, 2N + 1 if N is a bound for both inputs and
   outputs. */
mp_size_t
mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
			mp_ptr ap, mp_ptr bp, mp_size_t n,
			mp_ptr u0, mp_ptr u1, mp_size_t *unp,
			mp_ptr qp, mp_ptr tp)
{
  mp_size_t an, bn, un;
  mp_size_t qn;
  mp_size_t u0n;

  int swapped;

  an = bn = n;

  ASSERT (an > 0);
  ASSERT (ap[an-1] > 0 || bp[an-1] > 0);

  MPN_NORMALIZE (ap, an);
  MPN_NORMALIZE (bp, bn);

  un = *unp;

  swapped = 0;

  if (UNLIKELY (an == 0))
    {
    return_b:
      MPN_COPY (gp, bp, bn);
      *gn = bn;

      MPN_NORMALIZE (u0, un);
      MPN_COPY (up, u0, un);

      *usizep = swapped ? un : -un;

      return 0;
    }
  else if (UNLIKELY (bn == 0))
    {
      MPN_COPY (gp, ap, an);
      *gn = an;

      MPN_NORMALIZE (u1, un);
      MPN_COPY (up, u1, un);

      *usizep = swapped ? -un : un;

      return 0;
    }

  /* Arrange so that a > b, subtract an -= bn, and maintain
     normalization. */
  if (an < bn)
    {
      MPN_PTR_SWAP (ap, an, bp, bn);
      MP_PTR_SWAP (u0, u1);
      swapped ^= 1;
    }
  else if (an == bn)
    {
      int c;
      MPN_CMP (c, ap, bp, an);
      if (UNLIKELY (c == 0))
	{
	  MPN_COPY (gp, ap, an);
	  *gn = an;

	  /* Must return the smallest cofactor, +u1 or -u0 */
	  MPN_CMP (c, u0, u1, un);
	  ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));

	  if (c < 0)
	    {
	      MPN_NORMALIZE (u0, un);
	      MPN_COPY (up, u0, un);
	      swapped ^= 1;
	    }
	  else
	    {
	      MPN_NORMALIZE_NOT_ZERO (u1, un);
	      MPN_COPY (up, u1, un);
	    }

	  *usizep = swapped ? -un : un;
	  return 0;
	}
      else if (c < 0)
	{
	  MP_PTR_SWAP (ap, bp);
	  MP_PTR_SWAP (u0, u1);
	  swapped ^= 1;
	}
    }
  /* Reduce a -= b, u1 += u0 */
  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
  MPN_NORMALIZE (ap, an);
  ASSERT (an > 0);

  u1[un] = mpn_add_n (u1, u1, u0, un);
  un += (u1[un] > 0);

  /* Arrange so that a > b, and divide a = q b + r */
  if (an < bn)
    {
      MPN_PTR_SWAP (ap, an, bp, bn);
      MP_PTR_SWAP (u0, u1);
      swapped ^= 1;
    }
  else if (an == bn)
    {
      int c;
      MPN_CMP (c, ap, bp, an);
      if (UNLIKELY (c == 0))
	goto return_b;
      else if (c < 0)
	{
	  MP_PTR_SWAP (ap, bp);
	  MP_PTR_SWAP (u0, u1);
	  swapped ^= 1;
	}
    }

  /* Reduce a -= q b, u1 += q u0 */
  qn = an - bn + 1;
  mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);

  if (mpn_zero_p (ap, bn))
    goto return_b;

  n = bn;

  /* Update u1 += q u0 */
  u0n = un;
  MPN_NORMALIZE (u0, u0n);

  if (u0n > 0)
    {
      qn -= (qp[qn - 1] == 0);

      if (qn > u0n)
	mpn_mul (tp, qp, qn, u0, u0n);
      else
	mpn_mul (tp, u0, u0n, qp, qn);

      if (qn + u0n > un)
	{
	  mp_size_t u1n = un;
	  un = qn + u0n;
	  un -= (tp[un-1] == 0);
	  u1[un] = mpn_add (u1, tp, un, u1, u1n);
	}
      else
	{
	  u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
	}

      un += (u1[un] > 0);
    }

  *unp = un;
  return n;
}
Пример #20
0
/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
   points +2 and -2. */
int
mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
{
  int i;
  int neg;
  mp_limb_t cy;

  ASSERT (k >= 3);
  ASSERT (k < GMP_NUMB_BITS);

  ASSERT (hn > 0);
  ASSERT (hn <= n);

  /* The degree k is also the number of full-size coefficients, so
   * that last coefficient, of size hn, starts at xp + k*n. */

  cy = 0;
  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
  if (hn != n)
    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
  for (i = k - 4; i >= 0; i -= 2)
    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
  xp2[n] = cy;

  k--;

  cy = 0;
  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
  for (i = k - 4; i >= 0; i -= 2)
    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
  tp[n] = cy;

  if (k & 1)
    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
  else
    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));

  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;

#if HAVE_NATIVE_mpn_add_n_sub_n
  if (neg)
    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
  else
    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
  if (neg)
    mpn_sub_n (xm2, tp, xp2, n + 1);
  else
    mpn_sub_n (xm2, xp2, tp, n + 1);

  mpn_add_n (xp2, xp2, tp, n + 1);
#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */

  ASSERT (xp2[n] < (1<<(k+2))-1);
  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);

  neg ^= ((k & 1) - 1);

  return neg;
}
Пример #21
0
mp_limb_t
mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy, qsave;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);
  ASSERT (nn > dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn >= dn)
    {
      qn++;			/* pretend we'll need an extra limb */
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      tp = TMP_SALLOC_LIMBS (dn);

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
	  else
	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}
      qn = nn - dn - qn + 1;
      while (qn > dn)
	{
	  qp -= dn;
	  np -= dn;
	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
	  qn -= dn;
	}

      /* Since we pretended we'd need an extra quotient limb before, we now
	 have made sure the code above left just dn-1=qn quotient limbs to
	 develop.  Develop that plus a guard limb. */
      qn--;
      qp -= qn;
      np -= dn;
      qsave = qp[qn];
      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
      MPN_COPY_INCR (qp, qp + 1, qn);
      qp[qn] = qsave;
    }
  else    /* (qn < dn) */
    {
      mp_ptr q2p;
#if 0				/* not possible since we demand nn > dn */
      if (qn == 0)
	{
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
	  if (qh)
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
	  TMP_FREE;
	  return qh;
	}
#endif

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      q2p = TMP_SALLOC_LIMBS (qn + 1);
      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
	 callers not to be silly?  */
      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
	{
	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
				    dp - (qn + 1), qn + 1, dinv->inv32);
	}
      else
	{
	  /* It is tempting to use qp for recursive scratch and put quotient in
	     tp, but the recursive scratch needs one limb too many.  */
	  tp = TMP_SALLOC_LIMBS (qn + 1);
	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
	}
      MPN_COPY (qp, q2p + 1, qn);
    }

  TMP_FREE;
  return qh;
}
Пример #22
0
void
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
			   mp_size_t k, mp_size_t twor, int sa,
			   mp_limb_t vinf0)
{
  mp_limb_t cy, saved;
  mp_size_t twok;
  mp_size_t kk1;
  mp_ptr c1, v1, c3, vinf;

  twok = k + k;
  kk1 = twok + 1;

  c1 = c  + k;
  v1 = c1 + k;
  c3 = v1 + k;
  vinf = c3 + k;

#define v0 (c)
  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
  */
  if (sa)
    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
  else
    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
						      /* (5 3 1 1 0)*/

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */

  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
     tm1 >= 0                                         (0  1 0  1 0)
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
     and the division by two is exact.
     If (sa!=0) the sign of vm1 is negative */
  if (sa)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
     t1 >= 0
  */
  vinf[0] -= mpn_sub_n (v1, v1, c, twok);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
#endif

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */

  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
     result is v1 >= 0
  */
  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));

  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
  cy = mpn_add_n (c1, c1, vm1, kk1);
  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
  /* Memory allocated for vm1 is now free, it can be recycled ...*/

  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
     result is v2 >= 0 */
  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
#ifdef HAVE_NATIVE_mpn_sublsh1_n
  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
#else
  /* Overwrite unused vm1 */
  cy = mpn_lshift (vm1, vinf, twor, 1);
  cy += mpn_sub_n (v2, v2, vm1, twor);
#endif
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);

  /* Current matrix is
     [1 0 0 0 0; vinf
      0 1 0 0 0; v2
      1 0 1 0 0; v1
      0 1 0 1 0; vm1
      0 0 0 0 1] v0
     Some vaues already are in-place (we added vm1 in the correct position)
     | vinf|  v1 |  v0 |
	      | vm1 |
     One still is in a separated area
	| +v2 |
     We have to compute v1-=vinf; vm1 -= v2,
	   |-vinf|
	      | -v2 |
     Carefully reordering operations we can avoid to compute twice the sum
     of the high half of v2 plus the low half of vinf.
  */

  /* Add the high half of t2 in {vinf} */
  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
  } else { /* triggered only by very unbalanced cases like
	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
  }
  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
     result is >= 0 */
  /* Side effect: we also subtracted (high half) vm1 -= v2 */
  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
  vinf[0] = saved;
  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */

  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
     Operate only on the low half.
  */
  cy = mpn_sub_n (c1, c1, v2, k);
  MPN_DECR_U (v1, kk1, cy);

  /********************* Beginning the final phase **********************/

  /* Most of the recomposition was done */

  /* add t2 in {c+3k, ...}, but only the low half */
  cy = mpn_add_n (c3, c3, v2, k);
  vinf[0] += cy;
  ASSERT(vinf[0] >= cy); /* No carry */
  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */

#undef v0
}
Пример #23
0
/*
   We have

	{v0,2k} {v1,2k+1} {c+4k+1,r+r2-1} 
		v0 	v1       {-}vinf

	vinf0 is the first limb of vinf, which is overwritten by v1

	{vm1,2k+1} {v2, 2k+1}

	ws is temporary space

	sa is the sign of vm1

	rr2 is r+r2

	We want to compute

     t1   <- (3*v0+2*vm1+v2)/6-2*vinf
     t2   <- (v1+vm1)/2
  then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
     c0   <- v0
     c1   <- v1 - t1
     c2   <- t2 - v0 - vinf
     c3   <- t1 - t2
     c4   <- vinf
*/ 
void
mpn_toom3_interpolate (mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1,
		                 mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa,
		                                       mp_limb_t vinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  mp_size_t twok = k + k;
  mp_size_t kk1 = twok + 1;
  mp_ptr c1, c2, c3, c4, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

#define v0 (c)

/* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 	 v1          {-}vinf

	{vm1,2k+1} {v2, 2k+1}
*/

  /* v2 <- v2 - vm1 */
  if (sa < 0)
  {
	  mpn_add_n(v2, v2, vm1, kk1);
  } else
  {
	  mpn_sub_n(v2, v2, vm1, kk1);
  }

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */

 /* vm1 <- t2 := (v1 - sa*vm1) / 2
 */
  if (sa < 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, vm1, v1, kk1);
      mpn_half (vm1, kk1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_half (vm1, kk1);
#endif
    }

  /* v1 <- v1 - v0 - vinf */

  saved = c4[0];
  c4[0] = vinf0;
#if HAVE_NATIVE_mpn_subadd_n
  cy = mpn_subadd_n(v1, v1, v0, c4, rr2);
#else
  cy = mpn_sub_n(v1, v1, v0, rr2);
  cy += mpn_sub_n(v1, v1, c4, rr2);
#endif
  c4[0] = saved;
  if (rr2 < twok)
  {
	  v1[twok] -= mpn_sub_n(v1 + rr2, v1 + rr2, v0 + rr2, twok - rr2); 
	  MPN_DECR_U(v1 + rr2, kk1 - rr2, cy);
  }
  else v1[twok] -= cy;

  saved = c4[0];
  c4[0] = vinf0;
/* subtract 5*vinf from v2,
  */
  cy = mpn_submul_1 (v2, c4, rr2, CNST_LIMB(5));
  MPN_DECR_U (v2 + rr2, kk1 - rr2, cy);
  c4[0] = saved;

  /* v2 = (v2 - v1)/2 (exact)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  mpn_sub_n (v2, v2, v1, kk1);
  mpn_half (v2, kk1);
#endif

  /* v1 = v1 - vm1
  */
  mpn_sub_n(v1, v1, vm1, kk1);

  /* vm1 = vm1 - v2 and add vm1 in {c+k, ...} */
#if HAVE_NATIVE_mpn_addsub_n
  cy = mpn_addsub_n(c1, c1, vm1, v2, kk1);
#else
  mpn_sub_n(vm1, vm1, v2, kk1);
  cy = mpn_add_n (c1, c1, vm1, kk1);
#endif
  ASSERT_NOCARRY (mpn_add_1(c3 + 1, c3 + 1, rr2 + k - 1, cy)); /* 4k+rr2-(3k+1) = rr2+k-1 */

  /* don't forget to add vinf0 in {c+4k, ...} */
  ASSERT_NOCARRY (mpn_add_1(c4, c4, rr2, vinf0));

  /* add v2 in {c+3k, ...} */
  if (rr2 <= k + 1)
     ASSERT_NOCARRY (mpn_add_n (c3, c3, v2, k+rr2));
  else
  {
	  cy = mpn_add_n (c3, c3, v2, kk1);
     if (cy) ASSERT_NOCARRY (mpn_add_1(c5 + 1, c5 + 1, rr2 - k - 1, cy)); /* 4k+rr2-(5k+1) = rr2-k-1 */
  }

#undef v0
}
Пример #24
0
/* put in {c, 2n} where n = 2k+r the value of {v0,2k} (already in place)
   + B^k * [{v1, 2k+1} - {t1, 2k+1}]
   + B^(2k) * [{t2, 2k+1} - {v0+vinf, 2k}]
   + B^(3k) * [{t1, 2k+1} - {t2, 2k+1}]
   + B^(4k) * {vinf,2r} (high 2r-1 limbs already in place)
   where {t1, 2k+1} = (3*{v0,2k}+2*sa*{vm1,2k+1}+{v2,2k+1})/6-2*{vinf,2r}
	 {t2, 2k+1} = ({v1, 2k+1} + sa * {vm1, 2k+1})/2
   (sa is the sign of {vm1, 2k+1}).

   {vinf, 2r} stores the content of {v0, 2r} + {vinf, 2r}, with carry in cinf0.
   vinf0 is the low limb of vinf.

   ws is temporary space, and should have at least 2r limbs.

   Think about:

   The evaluated point a-b+c stands a good chance of having a zero carry
   limb, a+b+c would have a 1/4 chance, and 4*a+2*b+c a 1/8 chance, roughly.
   Perhaps this could be tested and stripped.  Doing so before recursing
   would be better than stripping at the start of mpn_toom3_mul_n/sqr_n,
   since then the recursion could be based on the new size.  Although in
   truth the kara vs toom3 crossover is never so exact that one limb either
   way makes a difference.

   A small value like 1 or 2 for the carry could perhaps also be handled
   with an add_n or addlsh1_n.  Would that be faster than an extra limb on a
   (recursed) multiply/square?
*/
static void
toom3_interpolate (mp_ptr c, mp_srcptr v1, mp_ptr v2, mp_ptr vm1,
		   mp_ptr vinf, mp_size_t k, mp_size_t r, int sa,
		   mp_limb_t vinf0, mp_limb_t cinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  unsigned long twok = k + k;
  unsigned long kk1 = twok + 1;
  unsigned long twor = r + r;
  mp_ptr c1, c2, c3, c4, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

#define v0 (c)
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0      |vm1|       hi(vinf)       v1       v2+2vm1      vinf
							      +lo(v0) */

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
#ifdef HAVE_NATIVE_mpn_rsh1add_n
  mpn_rsh1add_n (v2, v2, v0, twok); /* v2 <- (lo(v2)+v0) / 2, exact */
  cy = v2[twok] & 1; /* add high limb of v2 divided by 2 */
  v2[twok] >>= 1;
  MPN_INCR_U (v2 + twok - 1, 2, cy << (GMP_NUMB_BITS - 1));
#else
  v2[twok] += mpn_add_n (v2, v2, v0, twok);
  mpn_rshift (v2, v2, kk1, 1);
#endif

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0      |vm1|      hi(vinf)       v1    (3v0+2vm1+v2)    vinf
						    /6         +lo(v0) */

  /* vm1 <- t2 := (v1 + sa*vm1) / 2
     t2 = a0*b0+a0*b2+a1*b1+a2*b0+a2*b2 >= 0
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
     and the division by two is exact */
  if (sa >= 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, vm1, v1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       t2        hi(vinf)       v1         t1       vinf+lo(v0) */

  /* subtract 2*vinf to v2,
     result is t1 := a0*b0+a0*b2+a1*b1+a1*b2+a2*b0+a2*b1+a2*b2 >= 0 */
  saved = c4[0];
  c4[0] = vinf0;
#ifdef HAVE_NATIVE_mpn_sublsh1_n
  cy = mpn_sublsh1_n (v2, v2, c4, twor);
#else
  cy = mpn_lshift (ws, c4, twor, 1);
  cy += mpn_sub_n (v2, v2, ws, twor);
#endif
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
  c4[0] = saved;

  /* subtract {t2, 2k+1} in {c+3k, 2k+1} i.e. in {t2+k, 2k+1}:
     by chunks of k limbs from right to left to avoid overlap */
#define t2 (vm1)
  /* a borrow may occur in one of the 2 following __GMPN_SUB_1 calls, but since
     the final result is nonnegative, it will be compensated later on */
  __GMPN_SUB_1 (cout, c5, c5, twor - k, t2[twok]);
  cy = mpn_sub_n (c4, c4, t2 + k, k);
  __GMPN_SUB_1 (cout, c5, c5, twor - k, cy);
  cy = mpn_sub_n (c3, c3, t2, k);
  __GMPN_SUB_1 (cout, c4, c4, twor, cy);

  /* don't forget to add vinf0 in {c+4k, ...} */
  __GMPN_ADD_1 (cout, c4, c4, twor, vinf0);

  /* c  c+k c+2k c+3k c+4k+1   t  t+2k+1 t+4k+2
     v0     t2        hi(vinf) v1 t1     vinf
		 -t2                    +lo(v0)
  */

  /* c  c+k c+2k c+3k c+4k     t  t+2k+1 t+4k+2
     v0     t2        vinf     v1 t1     vinf
		 -t2                    +lo(v0)
  */

  /* subtract v0+vinf in {c+2k, ...} */
  cy = cinf0 + mpn_sub_n (c2, c2, vinf, twor);
  if (twor < twok)
    {
      __GMPN_SUB_1 (cy, c2 + twor, c2 + twor, twok - twor, cy);
      cy += mpn_sub_n (c2 + twor, c2 + twor, v0 + twor, twok - twor);
    }
  __GMPN_SUB_1 (cout, c4, c4, twor, cy); /* 2n-4k = 2r */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2          vinf      v1  t1      vinf
	      -v0   -t2                        +lo(v0)
	      -vinf                                    */

  /* subtract t1 in {c+k, ...} */
  cy = mpn_sub_n (c1, c1, v2, kk1);
  __GMPN_SUB_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1)=k+2r-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2          vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */

  /* add t1 in {c+3k, ...} */
  cy = mpn_add_n (c3, c3, v2, kk1);
  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2    t1    vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */

  /* add v1 in {c+k, ...} */
  cy = mpn_add_n (c1, c1, v1, kk1);
  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0  v1   t2    t1    vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */
#undef v0
#undef t2
}
Пример #25
0
/* Computes |v| = |(g - u a)| / b, where u may be positive or
   negative, and v is of the opposite sign. a, b are of size n, u and
   v at most size n, and v must have space for n+1 limbs. */
static mp_size_t
compute_v (mp_ptr vp,
	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
	   mp_srcptr gp, mp_size_t gn,
	   mp_srcptr up, mp_size_t usize,
	   mp_ptr tp)
{
  mp_size_t size;
  mp_size_t an;
  mp_size_t bn;
  mp_size_t vn;

  ASSERT (n > 0);
  ASSERT (gn > 0);
  ASSERT (usize != 0);

  size = ABS (usize);
  ASSERT (size <= n);

  an = n;
  MPN_NORMALIZE (ap, an);

  if (an >= size)
    mpn_mul (tp, ap, an, up, size);
  else
    mpn_mul (tp, up, size, ap, an);

  size += an;
  size -= tp[size - 1] == 0;

  ASSERT (gn <= size);

  if (usize > 0)
    {
      /* |v| = -v = (u a - g) / b */

      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
      MPN_NORMALIZE (tp, size);
      if (size == 0)
	return 0;
    }
  else
    { /* usize < 0 */
      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
      if (cy)
	tp[size++] = cy;
    }

  /* Now divide t / b. There must be no remainder */
  bn = n;
  MPN_NORMALIZE (bp, bn);
  ASSERT (size >= bn);

  vn = size + 1 - bn;
  ASSERT (vn <= n + 1);

  mpn_divexact (vp, tp, size, bp, bn);
  vn -= (vp[vn-1] == 0);

  return vn;
}