Esempio n. 1
0
void
ecc_modp_submul_1 (const struct ecc_curve *ecc, mp_limb_t *rp,
		   const mp_limb_t *ap, mp_limb_t b)
{
  mp_limb_t hi;

  assert (b <= 0xffffffff);
  hi = mpn_submul_1 (rp, ap, ecc->size, b);
  hi = mpn_submul_1 (rp, ecc->Bmodp, ecc->size, hi);
  assert (hi <= 1);
  hi = cnd_sub_n (hi, rp, ecc->Bmodp, ecc->size);
  /* Sufficient roughly if b < B^size / p */
  assert (hi == 0);
}
Esempio n. 2
0
void
mpn_dcpi1_bdiv_q_n (mp_ptr qp,
		    mp_ptr np, mp_srcptr dp, mp_size_t n,
		    mp_limb_t dinv, mp_ptr tp)
{
  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
    {
      mp_size_t lo, hi;
      mp_limb_t cy;

      lo = n >> 1;			/* floor(n/2) */
      hi = n - lo;			/* ceil(n/2) */

      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);

      mpn_mullo_n (tp, qp, dp + hi, lo);
      mpn_sub_n (np + hi, np + hi, tp, lo);

      if (lo < hi)
	{
	  cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
	  np[n - 1] -= cy;
	}
      qp += lo;
      np += lo;
      n -= lo;
    }
  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
}
Esempio n. 3
0
static mp_limb_t
mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  mp_limb_t qhl, cc;
  mp_size_t n2 = n/2;

  if (n % 2 != 0)
    {
      mp_ptr qp1 = qp + 1;
      qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch);
      qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2,
			mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch));

      cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]);
      cc = mpn_sub_1 (np + n, np + n, 1, cc);
      if (qhl != 0)
	cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
      while (cc != 0)
	{
	  qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1);
	  cc -= mpn_add_n (np + 1, np + 1, dp, n);
	}
      qhl += mpn_add_1 (qp1, qp1, n - 1,
			mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
    }
  else
    {
      qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch);
      qhl += mpn_add_1 (qp + n2, qp + n2, n2,
			mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch));
    }
  return qhl;
}
static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift(ws,src,n,s);
  return    __cy + mpn_sub_n(dst,dst,ws,n);
#endif
}
mp_limb_t
my_mpn_sbpi1_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0;
  mp_limb_t cy, cy1;
  mp_limb_t q;

  np += nn;

  qh = mpn_cmp (np - dn, dp, dn) >= 0;

  qp += nn - dn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  np -= 2;

  n1 = np[1];

  for (i = nn - (dn + 2); i > 0; i--)
    {
      np--;
	{
    mp_limb_t _q0, _t1, _t0, _mask;                                     
	  my_udiv_qr_3by2 (&q, &n1, &n0, &n1, &(np[1]), &(np[0]), &d1, &d0, &dinv, &_q0, &_t1, &_t0, &_mask);
	printf("my_udiv_qr_3by2 %08lx\n", q);


	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
	  np[0] = n0;

	}

      *--qp = q;
    }
  np[1] = n1;

  return qh;
}
Esempio n. 6
0
/* 
   Divide { a, m } by { b, n }, returning the high limb of the quotient
   (which will either be 0 or 1), storing the remainder in-place in
   { a, n } and the rest of the quotient in { q, m - n }.
   We require the most significant bit of { a, m } to be 1.
   dinv must be computed from b[n - 1], b[n - 2] by div_preinv1.
   Thus, currently we require n >= 2 and m >= n.
*/
mp_limb_t div_basecase(mp_ptr q, mp_ptr a, mp_size_t m, 
                                 mp_srcptr b, mp_size_t n, mp_limb_t dinv)
{
   mp_limb_t ret;
   mp_size_t i;

   /* ensure { a + i, n } < { b, n } */
   if (ret = (mpn_cmp(a + m - n, b, n) >= 0))
      mpn_sub_n(a + m - n, a + m - n, b, n);
   
   for (i = m - 1; i >= n; i--)
   {
      divrem21_preinv(q[i - n], a[i], a[i - 1], dinv);
      a[i] -= mpn_submul_1(a + i - n, b, n, q[i - n]);

      if (mpn_cmp(a + i - n, b, n) >= 0 || a[i] != 0)
      {
         q[i - n]++;
         a[i] -= mpn_sub_n(a + i - n, a + i - n, b, n);
      }
   }

   return ret;
}
Esempio n. 7
0
/* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
   with the most significant limb of the quotient as return value (0 or 1).
   Assumes the most significant bit of D is set. Clobbers N.

   The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
*/
static mp_limb_t
mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
                         mpfr_limb_srcptr dp, mp_size_t n)
{
  mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
  mpfr_pi1_t dinv2;

  np += n;

  if ((qh = (mpn_cmp (np, dp, n) >= 0)))
    mpn_sub_n (np, np, dp, n);

  /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */

  d1 = dp[n - 1];

  if (n == 1)
    {
      invert_limb (dinv, d1);
      umul_ppmm (q1, q0, np[0], dinv);
      qp[0] = np[0] + q1;
      return qh;
    }

  /* now n >= 2 */
  d0 = dp[n - 2];
  invert_pi1 (dinv2, d1, d0);
  /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
  while (n > 1)
    {
      /* Invariant: it remains to reduce n limbs from N (in addition to the
         initial low n limbs).
         Since n >= 2 here, necessarily we had n >= 2 initially, which means
         that in addition to the limb np[n-1] to reduce, we have at least 2
         extra limbs, thus accessing np[n-3] is valid. */

      /* warning: we can have np[n-1]=d1 and np[n-2]=d0, but since {np,n} < D,
         the largest possible partial quotient is B-1 */
      if (MPFR_UNLIKELY(np[n - 1] == d1 && np[n - 2] == d0))
        q2 = ~ (mp_limb_t) 0;
      else
        udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
                      d1, d0, dinv2.inv32);
      /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
         we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
         thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
         and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
         thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
         which proves that at most one correction is needed */
      q0 = mpn_submul_1 (np - 1, dp, n, q2);
      if (MPFR_UNLIKELY(q0 > np[n - 1]))
        {
          mpn_add_n (np - 1, np - 1, dp, n);
          q2 --;
        }
      qp[--n] = q2;
      dp ++;
    }

  /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
     q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
        <= floor((np[0]*B+np[1])/d1)
     thus q1 is not larger than the true quotient.
     q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
     For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
     thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
     (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
     d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
     thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.

     For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
     np[0]*B/d1 - 2.

     In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
     q - 4 <= q1 <= q
  */
  umul_ppmm (q1, q0, np[0], dinv2.inv32);
  qp[0] = np[0] + q1;

  return qh;
}
Esempio n. 8
0
mp_limb_t
mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy, qsave;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);
  ASSERT (nn > dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn >= dn)
    {
      qn++;			/* pretend we'll need an extra limb */
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      tp = TMP_SALLOC_LIMBS (dn);

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
	  else
	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}
      qn = nn - dn - qn + 1;
      while (qn > dn)
	{
	  qp -= dn;
	  np -= dn;
	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
	  qn -= dn;
	}

      /* Since we pretended we'd need an extra quotient limb before, we now
	 have made sure the code above left just dn-1=qn quotient limbs to
	 develop.  Develop that plus a guard limb. */
      qn--;
      qp -= qn;
      np -= dn;
      qsave = qp[qn];
      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
      MPN_COPY_INCR (qp, qp + 1, qn);
      qp[qn] = qsave;
    }
  else    /* (qn < dn) */
    {
      mp_ptr q2p;
#if 0				/* not possible since we demand nn > dn */
      if (qn == 0)
	{
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
	  if (qh)
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
	  TMP_FREE;
	  return qh;
	}
#endif

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      q2p = TMP_SALLOC_LIMBS (qn + 1);
      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
	 callers not to be silly?  */
      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
	{
	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
				    dp - (qn + 1), qn + 1, dinv->inv32);
	}
      else
	{
	  /* It is tempting to use qp for recursive scratch and put quotient in
	     tp, but the recursive scratch needs one limb too many.  */
	  tp = TMP_SALLOC_LIMBS (qn + 1);
	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
	}
      MPN_COPY (qp, q2p + 1, qn);
    }

  TMP_FREE;
  return qh;
}
Esempio n. 9
0
mp_limb_t
mpn_sbpi1_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0;
  mp_limb_t cy, cy1;
  mp_limb_t q;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  qp += nn - dn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  np -= 2;

  n1 = np[1];

  for (i = nn - (dn + 2); i > 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
	      q--;
	    }
	}

      *--qp = q;
    }
  np[1] = n1;

  return qh;
}
Esempio n. 10
0
mp_limb_t
mpn_sb_div_q (mp_ptr qp,
		 mp_ptr np, mp_size_t nn,
		 mp_srcptr dp, mp_size_t dn,
		 mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t qn, i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0, d11, d01;
  mp_limb_t cy, cy1;
  mp_limb_t q;
  mp_limb_t flag;

  mp_size_t dn_orig = dn, qn_orig;
  mp_srcptr dp_orig = dp;
  mp_ptr np_orig = np;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  if (dn <= SB_DIVAPPR_Q_SMALL_THRESHOLD)
     {
   qn_orig = qn;

   /* Reduce until dn - 2 >= qn */
   for (qn--, np--; qn > dn - 2; qn--)
     {
       /* fetch next word */
       cy = np[0];

       np--;
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
      
	    /* np -= dp*q1 */
       cy -= mpn_submul_1(np - dn + 1, dp, dn, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[dn - 1]))
         {
       if (cy || mpn_cmp(np - dn + 1, dp, dn) >= 0)
       {
          q++;
          cy -= mpn_sub_n(np - dn + 1, np - dn + 1, dp, dn);
       }
       }

       qp[qn] = q;
     }
   
   qn++;
   dp = dp + dn - qn - 1; /* make dp length qn + 1 */
   
   flag = ~CNST_LIMB(0);

   for ( ; qn > 0; qn--)
     {
       /* fetch next word */
       cy = np[0];
 
       np--;
       /* rare case where truncation ruins normalisation */
       if (cy > dp[qn] || (cy == dp[qn] && mpn_cmp(np - qn + 1, dp, qn) >= 0))
         {
       __div_helper(qp, np - qn, dp, qn);
       flag = 0;
       break;
         }
       
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
         
       /* np -= dp*q */
       cy -= mpn_submul_1(np - qn, dp, qn + 1, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[qn]))
         {
       if (cy || mpn_cmp(np - qn, dp, qn + 1) >= 0)
         {
       q++;
       cy -= mpn_sub_n(np - qn, np - qn, dp, qn + 1);
         }
         }
       
       qp[qn - 1] = q;
       dp++;
     }

     np--;
     n1 = np[1];
     qn = qn_orig;
     } 
  else
     {
  qp += qn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  d01 = d0 + 1;
  d11 = d1 + (d01 < d0);
  
  np -= 2;

  n1 = np[1];

  for (i = qn - (dn + 2); i >= 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 -= cy1;
	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
	      q--;
	    }
	}

      *--qp = q;
    }

  flag = ~CNST_LIMB(0);

  if (dn >= 0)
    {
      for (i = dn; i > 0; i--)
	{
	  np--;
	  if (UNLIKELY (n1 >= (d1 & flag)))
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);

	      if (UNLIKELY (n1 != cy))
		{
		  if (n1 < (cy & flag))
		    {
		      q--;
		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
		    }
		  else
		    flag = 0;
		}
	      n1 = np[1];
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	      cy = mpn_submul_1 (np - dn, dp, dn, q);

	      cy1 = n0 < cy;
	      n0 = (n0 - cy) & GMP_NUMB_MASK;
	      cy = n1 < cy1;
	      n1 -= cy1;
	      np[0] = n0;

	      if (UNLIKELY (cy != 0))
		{
		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
		  q--;
		}
	    }

	  *--qp = q;

	  /* Truncate operands.  */
	  dn--;
	  dp++;
	}

      np--;
      if (UNLIKELY (n1 >= (d1 & flag)))
	{
	  q = GMP_NUMB_MASK;
	  cy = mpn_submul_1 (np, dp, 2, q);

	  if (UNLIKELY (n1 != cy))
	    {
	      if (n1 < (cy & flag))
		{
		  q--;
		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
		}
	      else
		flag = 0;
	    }
	  n1 = np[1];
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  np[0] = n0;
	  np[1] = n1;
	}

      *--qp = q;
    }
  ASSERT_ALWAYS (np[1] == n1);
  }

  np += 2;

  dn = dn_orig;
  if (UNLIKELY (n1 < (dn & flag)))
    {
      mp_limb_t q, x;

      /* The quotient may be too large if the remainder is small.  Recompute
	 for above ignored operand parts, until the remainder spills.

	 FIXME: The quality of this code isn't the same as the code above.
	 1. We don't compute things in an optimal order, high-to-low, in order
	    to terminate as quickly as possible.
	 2. We mess with pointers and sizes, adding and subtracting and
	    adjusting to get things right.  It surely could be streamlined.
	 3. The only termination criteria are that we determine that the
	    quotient needs to be adjusted, or that we have recomputed
	    everything.  We should stop when the remainder is so large
	    that no additional subtracting could make it spill.
	 4. If nothing else, we should not do two loops of submul_1 over the
	    data, instead handle both the triangularization and chopping at
	    once.  */

      x = n1;

      if (dn > 2)
	{
	  /* Compensate for triangularization.  */
	  mp_limb_t y;

	  dp = dp_orig;
	  if (qn + 1 < dn)
	    {
	      dp += dn - (qn + 1);
	      dn = qn + 1;
	    }

	  y = np[-2];

	  for (i = dn - 3; i >= 0; i--)
	    {
	      q = qp[i];
	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);

	      if (y < cy)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      ASSERT_ALWAYS (cy == 0);
		      return qh - cy;
		    }
		  x--;
		}
	      y -= cy;
	    }
	  np[-2] = y;
	}

      dn = dn_orig;
      if (qn + 1 < dn)
	{
	  /* Compensate for ignored dividend and divisor tails.  */

	  dp = dp_orig;
	  np = np_orig;

	  if (qh != 0)
	    {
	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      if (qn != 0)
			cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh - cy;
		    }
		  x--;
		}
	    }

	  if (qn == 0)
	    return qh;

	  for (i = dn - qn - 2; i >= 0; i--)
	    {
	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh;
		    }
		  x--;
		}
	    }
	}
    }

  return qh;
}
Esempio n. 11
0
mp_limb_t
mpn_dc_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);		/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn > dn)
    {
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0, d11, d01;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];
     d01 = d0 + 1;
     d11 = d1 + (d01 < d0);

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n2, n1, n0, d11, d01, d1, d0, dinv);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
      /* Do a 2qn / qn division */
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
	  else
	    qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}

      qn = nn - dn - qn;
      do
	{
	  qp -= dn;
	  np -= dn;
	  ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp));
	  qn -= dn;
	}
      while (qn > 0);
    }
  else
    {
      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
      else
	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

      if (qn != dn)
	{
	  if (qn > dn - qn)
	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	  else
	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	  if (qh != 0)
	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	  while (cy != 0)
	    {
	      qh -= mpn_sub_1 (qp, qp, qn, 1);
	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
	    }
	}
    }

  TMP_FREE;
  return qh;
}
Esempio n. 12
0
mp_limb_t
mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
	    mp_ptr np, mp_size_t nsize,
	    mp_srcptr dp, mp_size_t dsize)
{
  mp_limb_t most_significant_q_limb = 0;

  switch (dsize)
    {
    case 0:
      /* We are asked to divide by zero, so go ahead and do it!  (To make
	 the compiler not remove this statement, return the value.)  */
      return 1 / dsize;

    case 1:
      {
	mp_size_t i;
	mp_limb_t n1;
	mp_limb_t d;

	d = dp[0];
	n1 = np[nsize - 1];

	if (n1 >= d)
	  {
	    n1 -= d;
	    most_significant_q_limb = 1;
	  }

	qp += qextra_limbs;
	for (i = nsize - 2; i >= 0; i--)
	  udiv_qrnnd (qp[i], n1, n1, np[i], d);
	qp -= qextra_limbs;

	for (i = qextra_limbs - 1; i >= 0; i--)
	  udiv_qrnnd (qp[i], n1, n1, 0, d);

	np[0] = n1;
      }
      break;

    case 2:
      {
	mp_size_t i;
	mp_limb_t n1, n0, n2;
	mp_limb_t d1, d0;

	np += nsize - 2;
	d1 = dp[1];
	d0 = dp[0];
	n1 = np[1];
	n0 = np[0];

	if (n1 >= d1 && (n1 > d1 || n0 >= d0))
	  {
	    sub_ddmmss (n1, n0, n1, n0, d1, d0);
	    most_significant_q_limb = 1;
	  }

	for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
	  {
	    mp_limb_t q;
	    mp_limb_t r;

	    if (i >= qextra_limbs)
	      np--;
	    else
	      np[0] = 0;

	    if (n1 == d1)
	      {
		/* Q should be either 111..111 or 111..110.  Need special
		   treatment of this rare case as normal division would
		   give overflow.  */
		q = ~(mp_limb_t) 0;

		r = n0 + d1;
		if (r < d1)	/* Carry in the addition? */
		  {
		    add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
		    qp[i] = q;
		    continue;
		  }
		n1 = d0 - (d0 != 0);
		n0 = -d0;
	      }
	    else
	      {
		udiv_qrnnd (q, r, n1, n0, d1);
		umul_ppmm (n1, n0, d0, q);
	      }

	    n2 = np[0];
	  q_test:
	    if (n1 > r || (n1 == r && n0 > n2))
	      {
		/* The estimated Q was too large.  */
		q--;

		sub_ddmmss (n1, n0, n1, n0, 0, d0);
		r += d1;
		if (r >= d1)	/* If not carry, test Q again.  */
		  goto q_test;
	      }

	    qp[i] = q;
	    sub_ddmmss (n1, n0, r, n2, n1, n0);
	  }
	np[1] = n1;
	np[0] = n0;
      }
      break;

    default:
      {
	mp_size_t i;
	mp_limb_t dX, d1, n0;

	np += nsize - dsize;
	dX = dp[dsize - 1];
	d1 = dp[dsize - 2];
	n0 = np[dsize - 1];

	if (n0 >= dX)
	  {
	    if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
	      {
		mpn_sub_n (np, np, dp, dsize);
		n0 = np[dsize - 1];
		most_significant_q_limb = 1;
	      }
	  }

	for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
	  {
	    mp_limb_t q;
	    mp_limb_t n1, n2;
	    mp_limb_t cy_limb;

	    if (i >= qextra_limbs)
	      {
		np--;
		n2 = np[dsize];
	      }
	    else
	      {
		n2 = np[dsize - 1];
		MPN_COPY_DECR (np + 1, np, dsize);
		np[0] = 0;
	      }

	    if (n0 == dX)
	      /* This might over-estimate q, but it's probably not worth
		 the extra code here to find out.  */
	      q = ~(mp_limb_t) 0;
	    else
	      {
		mp_limb_t r;

		udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
		umul_ppmm (n1, n0, d1, q);

		while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
		  {
		    q--;
		    r += dX;
		    if (r < dX)	/* I.e. "carry in previous addition?"  */
		      break;
		    n1 -= n0 < d1;
		    n0 -= d1;
		  }
	      }

	    /* Possible optimization: We already have (q * n0) and (1 * n1)
	       after the calculation of q.  Taking advantage of that, we
	       could make this loop make two iterations less.  */

	    cy_limb = mpn_submul_1 (np, dp, dsize, q);

	    if (n2 != cy_limb)
	      {
		mpn_add_n (np, np, dp, dsize);
		q--;
	      }

	    qp[i] = q;
	    n0 = np[dsize - 1];
	  }
      }
    }

  return most_significant_q_limb;
}
void
mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
{
  mp_limb_t cy;
  mp_size_t n3;
  mp_size_t n3p1;
  n3 = 3 * n;
  n3p1 = n3 + 1;

#define   r4    (pp + n3)			/* 3n+1 */
#define   r2    (pp + 7 * n)			/* 3n+1 */
#define   r0    (pp +11 * n)			/* s+t <= 2*n */

  /******************************* interpolation *****************************/
  if (half != 0) {
    cy = mpn_sub_n (r3, r3, r0, spt);
    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);

    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);

    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
  };

  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);

#if HAVE_NATIVE_mpn_add_n_sub_n
  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
#else
  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
  MP_PTR_SWAP(r1, wsi);
#endif

  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);

#if HAVE_NATIVE_mpn_add_n_sub_n
  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
#else
  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
  MP_PTR_SWAP(r5, wsi);
#endif

  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);

#if AORSMUL_FASTER_AORS_AORSLSH
  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
#else
  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
#endif
  /* A division by 2835x4 follows. Warning: the operand can be negative! */
  mpn_divexact_by2835x4(r4, r4, n3p1);
  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));

#if AORSMUL_FASTER_2AORSLSH
  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
#else
  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
#endif
  mpn_divexact_by255(r5, r5, n3p1);

  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));

#if AORSMUL_FASTER_3AORSLSH
  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
#else
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
#endif
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
  mpn_divexact_by42525(r1, r1, n3p1);

#if AORSMUL_FASTER_AORS_2AORSLSH
  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
#else
  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
#endif
  mpn_divexact_by9x4(r2, r2, n3p1);

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));

  mpn_sub_n (r4, r2, r4, n3p1);
  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));

  mpn_add_n (r5, r5, r1, n3p1);
  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));

  /* last interpolation steps... */
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
  /* ... could be mixed with recomposition
	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
  */

  /***************************** recomposition *******************************/
  /*
    pp[] prior to operations:
    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp

    summation scheme for remaining operations:
    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
  */

  cy = mpn_add_n (pp + n, pp + n, r5, n);
  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
#if HAVE_NATIVE_mpn_add_nc
  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
#else
  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
#endif
  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);

  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
#if HAVE_NATIVE_mpn_add_nc
  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
#else
  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
#endif
  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);

  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
  if (half) {
    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
#if HAVE_NATIVE_mpn_add_nc
    if (LIKELY (spt > n)) {
      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
    } else {
      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
    }
#else
    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
    if (LIKELY (spt > n)) {
      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
    } else {
      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
    }
#endif
  } else {
    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
  }

#undef   r0
#undef   r2
#undef   r4
}
Esempio n. 14
0
/*
   We have

	{v0,2k} {v1,2k+1} {c+4k+1,r+r2-1} 
		v0 	v1       {-}vinf

	vinf0 is the first limb of vinf, which is overwritten by v1

	{vm1,2k+1} {v2, 2k+1}

	ws is temporary space

	sa is the sign of vm1

	rr2 is r+r2

	We want to compute

     t1   <- (3*v0+2*vm1+v2)/6-2*vinf
     t2   <- (v1+vm1)/2
  then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
     c0   <- v0
     c1   <- v1 - t1
     c2   <- t2 - v0 - vinf
     c3   <- t1 - t2
     c4   <- vinf
*/ 
void
mpn_toom3_interpolate (mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1,
		                 mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa,
		                                       mp_limb_t vinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  mp_size_t twok = k + k;
  mp_size_t kk1 = twok + 1;
  mp_ptr c1, c2, c3, c4, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

#define v0 (c)

/* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 	 v1          {-}vinf

	{vm1,2k+1} {v2, 2k+1}
*/

  /* v2 <- v2 - vm1 */
  if (sa < 0)
  {
	  mpn_add_n(v2, v2, vm1, kk1);
  } else
  {
	  mpn_sub_n(v2, v2, vm1, kk1);
  }

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */

 /* vm1 <- t2 := (v1 - sa*vm1) / 2
 */
  if (sa < 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, vm1, v1, kk1);
      mpn_half (vm1, kk1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_half (vm1, kk1);
#endif
    }

  /* v1 <- v1 - v0 - vinf */

  saved = c4[0];
  c4[0] = vinf0;
#if HAVE_NATIVE_mpn_subadd_n
  cy = mpn_subadd_n(v1, v1, v0, c4, rr2);
#else
  cy = mpn_sub_n(v1, v1, v0, rr2);
  cy += mpn_sub_n(v1, v1, c4, rr2);
#endif
  c4[0] = saved;
  if (rr2 < twok)
  {
	  v1[twok] -= mpn_sub_n(v1 + rr2, v1 + rr2, v0 + rr2, twok - rr2); 
	  MPN_DECR_U(v1 + rr2, kk1 - rr2, cy);
  }
  else v1[twok] -= cy;

  saved = c4[0];
  c4[0] = vinf0;
/* subtract 5*vinf from v2,
  */
  cy = mpn_submul_1 (v2, c4, rr2, CNST_LIMB(5));
  MPN_DECR_U (v2 + rr2, kk1 - rr2, cy);
  c4[0] = saved;

  /* v2 = (v2 - v1)/2 (exact)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  mpn_sub_n (v2, v2, v1, kk1);
  mpn_half (v2, kk1);
#endif

  /* v1 = v1 - vm1
  */
  mpn_sub_n(v1, v1, vm1, kk1);

  /* vm1 = vm1 - v2 and add vm1 in {c+k, ...} */
#if HAVE_NATIVE_mpn_addsub_n
  cy = mpn_addsub_n(c1, c1, vm1, v2, kk1);
#else
  mpn_sub_n(vm1, vm1, v2, kk1);
  cy = mpn_add_n (c1, c1, vm1, kk1);
#endif
  ASSERT_NOCARRY (mpn_add_1(c3 + 1, c3 + 1, rr2 + k - 1, cy)); /* 4k+rr2-(3k+1) = rr2+k-1 */

  /* don't forget to add vinf0 in {c+4k, ...} */
  ASSERT_NOCARRY (mpn_add_1(c4, c4, rr2, vinf0));

  /* add v2 in {c+3k, ...} */
  if (rr2 <= k + 1)
     ASSERT_NOCARRY (mpn_add_n (c3, c3, v2, k+rr2));
  else
  {
	  cy = mpn_add_n (c3, c3, v2, kk1);
     if (cy) ASSERT_NOCARRY (mpn_add_1(c5 + 1, c5 + 1, rr2 - k - 1, cy)); /* 4k+rr2-(5k+1) = rr2-k-1 */
  }

#undef v0
}
Esempio n. 15
0
mp_limb_t
mpn_sb_divrem_mn (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn)
{
  mp_limb_t most_significant_q_limb = 0;
  mp_size_t qn = nn - dn;
  mp_size_t i;
  mp_limb_t dx, d1, n0;
  mp_limb_t dxinv;
  int use_preinv;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  np += qn;
  dx = dp[dn - 1];
  d1 = dp[dn - 2];
  n0 = np[dn - 1];

  if (n0 >= dx)
    {
      if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
	{
	  mpn_sub_n (np, np, dp, dn);
	  most_significant_q_limb = 1;
	}
    }

  /* use_preinv is possibly a constant, but it's left to the compiler to
     optimize away the unused code in that case.  */
  use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
  if (use_preinv)
    invert_limb (dxinv, dx);

  for (i = qn - 1; i >= 0; i--)
    {
      mp_limb_t q;
      mp_limb_t nx;
      mp_limb_t cy_limb;

      nx = np[dn - 1];		/* FIXME: could get value from r1 */
      np--;

      if (nx == dx)
	{
	  /* This might over-estimate q, but it's probably not worth
	     the extra code here to find out.  */
	  q = GMP_NUMB_MASK;

#if 1
	  cy_limb = mpn_submul_1 (np, dp, dn, q);
#else
	  /* This should be faster on many machines */
	  cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
	  cy = mpn_add_n (np, np, dp, dn);
	  np[dn] += cy;
#endif

	  if (nx != cy_limb)
	    {
	      mpn_add_n (np, np, dp, dn);
	      q--;
	    }

	  qp[i] = q;
	}
      else
	{
	  mp_limb_t rx, r1, r0, p1, p0;

	  /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
	     when np[dn-1] is used in an asm statement like umul_ppmm in
	     udiv_qrnnd_preinv.  The symptom is seg faults due to registers
	     being clobbered.  gcc 2.95 i386 doesn't have the problem. */
	  {
	    mp_limb_t  workaround = np[dn - 1];
	    if (use_preinv)
	      udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
	    else
	      {
		udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
			    dx << GMP_NAIL_BITS);
		r1 >>= GMP_NAIL_BITS;
	      }
	  }
	  umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
	  p0 >>= GMP_NAIL_BITS;

	  r0 = np[dn - 2];
	  rx = 0;
	  if (r1 < p1 || (r1 == p1 && r0 < p0))
	    {
	      p1 -= p0 < d1;
	      p0 = (p0 - d1) & GMP_NUMB_MASK;
	      q--;
	      r1 = (r1 + dx) & GMP_NUMB_MASK;
	      rx = r1 < dx;
	    }

	  p1 += r0 < p0;	/* cannot carry! */
	  rx -= r1 < p1;	/* may become 11..1 if q is still too large */
	  r1 = (r1 - p1) & GMP_NUMB_MASK;
	  r0 = (r0 - p0) & GMP_NUMB_MASK;

	  cy_limb = mpn_submul_1 (np, dp, dn - 2, q);

	  /* Check if we've over-estimated q, and adjust as needed.  */
	  {
	    mp_limb_t cy1, cy2;
	    cy1 = r0 < cy_limb;
	    r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
	    cy2 = r1 < cy1;
	    r1 -= cy1;
	    np[dn - 1] = r1;
	    np[dn - 2] = r0;
	    if (cy2 != rx)
	      {
		mpn_add_n (np, np, dp, dn);
		q--;
	      }
	  }
	  qp[i] = q;
	}
    }

  /* ______ ______ ______
    |__rx__|__r1__|__r0__|		partial remainder
	    ______ ______
	 - |__p1__|__p0__|		partial product to subtract
	    ______ ______
	 - |______|cylimb|

     rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
     carry out).  If rx=-1 then q is too large.  If rx=0, then q might
     be too large, but it is most likely correct.
  */

  return most_significant_q_limb;
}
Esempio n. 16
0
mp_limb_t
mpn_sb_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0, d01, d11;
  mp_limb_t cy, cy1, cy2;
  mp_limb_t q;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  d1 = dp[dn - 1];

  if (dn <= SB_DIV_QR_SMALL_THRESHOLD)
    {
    np--;
    
  for (i = nn - dn - 1; i >= 0; i--)
    {
      /* fetch next word */
      cy = *np--;
      
      mpir_divapprox32_preinv2(q, cy, np[0], dinv);
      
	  /* np -= dp*q */
      cy -= mpn_submul_1(np - dn + 1, dp, dn, q);

      /* correct if remainder is too large */
      if (UNLIKELY(cy || np[0] >= d1))
      {
         if (cy || mpn_cmp(np - dn + 1, dp, dn) >= 0)
         {
            q++;
            mpn_sub_n(np - dn + 1, np - dn + 1, dp, dn);
         }
      }
      qp[i] = q;
     }
    }
  else
    {
  qp += nn - dn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d0 = dp[dn];

  d01 = d0 + 1;
  d11 = d1 + (d01 == 0);

  np -= 2;

  n1 = np[1];

  for (i = nn - (dn + 2); i > 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  mpir_divrem32_preinv2(q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

     cy2 = mpn_submul_1 (np - dn, dp, dn, q);

	  sub_333(cy, n1, n0, 0, n1, n0, 0, 0, cy2);

	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
         q--;
	    }
	}

      *--qp = q;
    }
  np[1] = n1;
  }

  return qh;
}
Esempio n. 17
0
void
check (void)
{
  mp_limb_t  wp[100], xp[100], yp[100];
  mp_size_t  size = 100;

  refmpn_zero (xp, size);
  refmpn_zero (yp, size);
  refmpn_zero (wp, size);

  pre ("mpn_add_n");
  mpn_add_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_add_nc
  pre ("mpn_add_nc");
  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
  pre ("mpn_addlsh1_n");
  mpn_addlsh1_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_and_n
  pre ("mpn_and_n");
  mpn_and_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_andn_n
  pre ("mpn_andn_n");
  mpn_andn_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_addmul_1");
  mpn_addmul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_addmul_1c
  pre ("mpn_addmul_1c");
  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_com_n
  pre ("mpn_com_n");
  mpn_com_n (wp, xp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_copyd
  pre ("mpn_copyd");
  mpn_copyd (wp, xp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_copyi
  pre ("mpn_copyi");
  mpn_copyi (wp, xp, size);
  post ();
#endif

  pre ("mpn_divexact_1");
  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_divexact_by3c");
  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));
  post ();

  pre ("mpn_divrem_1");
  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_divrem_1c
  pre ("mpn_divrem_1c");
  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();
#endif

  pre ("mpn_gcd_1");
  xp[0] |= 1;
  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_gcd_finda
  pre ("mpn_gcd_finda");
  xp[0] |= 1;
  xp[1] |= 1;
  notdead += mpn_gcd_finda (xp);
  post ();
#endif

  pre ("mpn_hamdist");
  notdead += mpn_hamdist (xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_ior_n
  pre ("mpn_ior_n");
  mpn_ior_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_iorn_n
  pre ("mpn_iorn_n");
  mpn_iorn_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_lshift");
  mpn_lshift (wp, xp, size, 1);
  post ();

  pre ("mpn_mod_1");
  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_mod_1c
  pre ("mpn_mod_1c");
  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();
#endif

#if GMP_NUMB_BITS % 4 == 0
  pre ("mpn_mod_34lsub1");
  notdead += mpn_mod_34lsub1 (xp, size);
  post ();
#endif

  pre ("mpn_modexact_1_odd");
  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_modexact_1c_odd");
  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));
  post ();

  pre ("mpn_mul_1");
  mpn_mul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_mul_1c
  pre ("mpn_mul_1c");
  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_mul_2
  pre ("mpn_mul_2");
  mpn_mul_2 (wp, xp, size-1, yp);
  post ();
#endif

  pre ("mpn_mul_basecase");
  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);
  post ();

#if HAVE_NATIVE_mpn_nand_n
  pre ("mpn_nand_n");
  mpn_nand_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_nior_n
  pre ("mpn_nior_n");
  mpn_nior_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_popcount");
  notdead += mpn_popcount (xp, size);
  post ();

  pre ("mpn_preinv_mod_1");
  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,
                               refmpn_invert_limb (GMP_NUMB_MAX));
  post ();

#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
  pre ("mpn_preinv_divrem_1");
  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,
                       refmpn_invert_limb (GMP_NUMB_MAX), 0);
  post ();
#endif

#if HAVE_NATIVE_mpn_rsh1add_n
  pre ("mpn_rsh1add_n");
  mpn_rsh1add_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_rsh1sub_n
  pre ("mpn_rsh1sub_n");
  mpn_rsh1sub_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_rshift");
  mpn_rshift (wp, xp, size, 1);
  post ();

  pre ("mpn_sqr_basecase");
  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);
  post ();

  pre ("mpn_submul_1");
  mpn_submul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_submul_1c
  pre ("mpn_submul_1c");
  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

  pre ("mpn_sub_n");
  mpn_sub_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_sub_nc
  pre ("mpn_sub_nc");
  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_sublsh1_n
  pre ("mpn_sublsh1_n");
  mpn_sublsh1_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_udiv_qrnnd
  pre ("mpn_udiv_qrnnd");
  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));
  post ();
#endif

#if HAVE_NATIVE_mpn_udiv_qrnnd_r
  pre ("mpn_udiv_qrnnd_r");
  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_umul_ppmm
  pre ("mpn_umul_ppmm");
  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_umul_ppmm_r
  pre ("mpn_umul_ppmm_r");
  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_xor_n
  pre ("mpn_xor_n");
  mpn_xor_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_xnor_n
  pre ("mpn_xnor_n");
  mpn_xnor_n (wp, xp, yp, size);
  post ();
#endif
}
Esempio n. 18
0
void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
                            mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N,
                            int sinonly, int alternating)
{
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
    slong power, k, m;
    int cosorsin;

    TMP_INIT;
    TMP_START;

    if (2 * N >= FACTORIAL_TAB_SIZE - 1)
    {
        flint_printf("_arb_sin_cos_taylor_rs: N too large!\n");
        abort();
    }

    if (N <= 1)
    {
        if (N == 0)
        {
            flint_mpn_zero(ysin, xn);
            if (!sinonly) flint_mpn_zero(ycos, xn);
            error[0] = 0;
        }
        else if (N == 1)
        {
            flint_mpn_copyi(ysin, x, xn);
            if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES);
            error[0] = 1;
        }
    }
    else
    {
        /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */
        m = 2;
        while (m * m < N)
            m += 2;

        /* todo: merge allocations */
        xpow = TMP_ALLOC_LIMBS((m + 1) * xn);
        s = TMP_ALLOC_LIMBS(xn + 2);
        t = TMP_ALLOC_LIMBS(2 * xn + 2);     /* todo: 1 limb too much? */

        /* higher index ---> */
        /*        | ---xn--- | */
        /* xpow = |  <temp>  | x^m | x^(m-1) | ... | x^2 | x | */

#define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn)
#define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn)

        mpn_sqr(XPOW_WRITE(1), x, xn);
        mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn);

        for (k = 4; k <= m; k += 2)
        {
            mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn);
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);
        }

        for (cosorsin = sinonly; cosorsin < 2; cosorsin++)
        {
            flint_mpn_zero(s, xn + 1);

            /* todo: skip one nonscalar multiplication (use x^m)
               when starting on x^0 */
            power = (N - 1) % m;

            for (k = N - 1; k >= 0; k--)
            {
                c = factorial_tab_numer[2 * k + cosorsin];
                new_denom = factorial_tab_denom[2 * k + cosorsin];
                old_denom = factorial_tab_denom[2 * k + cosorsin + 2];

                /* change denominators */
                if (new_denom != old_denom && k < N - 1)
                {
                    if (alternating && (k % 2 == 0))
                        s[xn] += old_denom;

                    mpn_divrem_1(s, 0, s, xn + 1, old_denom);

                    if (alternating && (k % 2 == 0))
                        s[xn] -= 1;
                }

                if (power == 0)
                {
                    /* add c * x^0 -- only top limb is affected */
                    if (alternating & k)
                        s[xn] -= c;
                    else
                        s[xn] += c;

                    /* Outer polynomial evaluation: multiply by x^m */
                    if (k != 0)
                    {
                        mpn_mul(t, s, xn + 1, XPOW_READ(m), xn);
                        flint_mpn_copyi(s, t + xn, xn + 1);
                    }

                    power = m - 1;
                }
                else
                {
                    if (alternating & k)
                        s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c);
                    else
                        s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c);

                    power--;
                }
            }

            /* finally divide by denominator */
            if (cosorsin == 0)
            {
                mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]);

                /* perturb down to a number < 1 if necessary. note that this
                   does not invalidate the error bound: 1 - ulp is either
                   1 ulp too small or must be closer to the exact value */
                if (t[xn] == 0)
                    flint_mpn_copyi(ycos, t, xn);
                else
                    flint_mpn_store(ycos, xn, LIMB_ONES);
            }
            else
            {
                mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]);
                mpn_mul(t, s, xn + 1, x, xn);
                flint_mpn_copyi(ysin, t + xn, xn);
            }
        }

        /* error bound (ulp) */
        error[0] = 2;
    }

    TMP_END;
}
Esempio n. 19
0
void tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y)
{
  mp_size_t  sign, wu, xu, ws, new_wn, min_size, dsize;
  mp_limb_t  cy;

  /* w unaffected if x==0 or y==0 */
  if (xn == 0 || y == 0)
    return;

  sign = xn;
  xu = ABS (xn);

  ws = *wn;
  if (*wn == 0)
  {
      /* nothing to add to, just set x*y, "sign" gives the sign */
      cy = mpn_mul_1 (wp, xp, xu, y);
      if (cy)
		{
			wp[xu] = cy;
         xu = xu + 1;
		} 
      *wn = (sign >= 0 ? xu : -xu);
      return;
  }
  
  sign ^= *wn;
  wu = ABS (*wn);

  new_wn = MAX (wu, xu);
  min_size = MIN (wu, xu);

  if (sign >= 0)
  {
      /* addmul of absolute values */

      cy = mpn_addmul_1 (wp, xp, min_size, y);
      
      dsize = xu - wu;
#if HAVE_NATIVE_mpn_mul_1c
      if (dsize > 0)
        cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy);
      else if (dsize < 0)
      {
          dsize = -dsize;
          cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);
      }
#else
      if (dsize != 0)
      {
          mp_limb_t cy2;
          if (dsize > 0)
            cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y);
          else
          {
              dsize = -dsize;
              cy2 = 0;
          }
          cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);
      }
#endif

      if (cy)
		{
			wp[dsize + min_size] = cy;
         new_wn ++;
		}
   } else
   {
      /* submul of absolute values */

      cy = mpn_submul_1 (wp, xp, min_size, y);
      if (wu >= xu)
      {
          /* if w bigger than x, then propagate borrow through it */
          if (wu != xu)
            cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy);

          if (cy != 0)
          {
              /* Borrow out of w, take twos complement negative to get
                 absolute value, flip sign of w.  */
              wp[new_wn] = ~-cy;  /* extra limb is 0-cy */
              mpn_not (wp, new_wn);
              new_wn++;
              MPN_INCR_U (wp, new_wn, CNST_LIMB(1));
              ws = -*wn;
          }
      } else /* wu < xu */
      {
          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
             take twos complement and use an mpn_mul_1 for the rest.  */

          mp_limb_t  cy2;

          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
          mpn_not (wp, wu);
          cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1));
          cy -= 1;

          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
          cy2 = (cy == MP_LIMB_T_MAX);
          cy += cy2;
          MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy);
          wp[new_wn] = cy;
          new_wn += (cy != 0);

          /* Apply any -1 from above.  The value at wp+wsize is non-zero
             because y!=0 and the high limb of x will be non-zero.  */
          if (cy2)
            MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1));

          ws = -*wn;
        }

      /* submul can produce high zero limbs due to cancellation, both when w
         has more limbs or x has more  */
      MPN_NORMALIZE (wp, new_wn);
  }

  *wn = (ws >= 0 ? new_wn : -new_wn);

  ASSERT (new_wn == 0 || wp[new_wn - 1] != 0);
}
Esempio n. 20
0
/*
   Toom 4 interpolation. Interpolates the value at 2^(sn*B) of a 
	polynomial p(x) with 7 coefficients given the values 
	p(oo), p(2), p(1), p(-1), 2^6*p(1/2), 2^6*p(-1/2), p(0).
	The output is placed in rp and the final number of limbs of the
	output is given in rpn.
	The 4th and 6th values may be negative, and if so, n4 and n6 
	should be set to a negative value respectively.
   To save space we pass r3, r5, r7 in place in the output rp.
	The other r's are stored separately in space tp.
	The low limb of r3 is stored in r30, as it will be overwritten
	by the high limb of r5.

rp          rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                      <-------------r3------------->

   We assume that r1 is stored at tp, r2 at (tp + t4), r4 at (tp + 2*t4) 
	and r6 (tp + 3*t4). Each of these r's has t4 = s4 + 1 limbs allocated.
*/
void mpn_toom4_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn,  
		       mp_ptr tp, mp_size_t s4, mp_size_t n4, mp_size_t n6, mp_limb_t r30)
{
	mp_size_t n1, n2, n3, n5, n7, t4;
	mp_limb_t saved, saved2, cy;

   t4 = s4 + 1; 
   
	mpn_add_n(r2, r2, r5, s4);

   if (n6 < 0) 
		mpn_add_n(r6, r5, r6, s4);
	else
      mpn_sub_n(r6, r5, r6, s4);
	/* r6 is now in twos complement format */

	saved = r3[0];
	r3[0] = r30;
	if (n4 < 0) 
		mpn_add_n(r4, r3, r4, s4);
	else
      mpn_sub_n(r4, r3, r4, s4);
	r3[0] = saved;
	/* r4 is now in twos complement format */
	
	mpn_sub_n(r5, r5, r1, s4);

#if HAVE_NATIVE_mpn_sublsh_n
	r5[s4-1] -= mpn_sublsh_n(r5, r5, r7, s4-1, 6);
#else
	r5[s4-1] -= mpn_submul_1(r5, r7, s4-1, 64);
#endif
   
   TC4_RSHIFT1(r4, s4); 
	
	saved = r3[0];
	r3[0] = r30;
	mpn_sub_n(r3, r3, r4, s4);
	r30 = r3[0];
	r3[0] = saved;

	mpn_double(r5, s4); 

	mpn_sub_n(r5, r5, r6, s4);

   saved = r3[0];
	r3[0] = r30;
	mpn_submul_1(r2, r3, s4, 65);
   r3[0] = saved;
	
	saved2 = r7[s4-1];
	r7[s4-1] = CNST_LIMB(0); // r7 is always positive so no sign extend needed
	saved = r3[0];
	r3[0] = r30;
#if HAVE_NATIVE_mpn_subadd_n
	mpn_subadd_n(r3, r3, r7, r1, s4);
#else
    mpn_sub_n(r3, r3, r7, s4);
    mpn_sub_n(r3, r3, r1, s4);
#endif
	r7[s4-1] = saved2;
   r30 = r3[0];
	
   mpn_addmul_1(r2, r3, s4, 45);

#if HAVE_NATIVE_mpn_sublsh_n
   cy = mpn_sublsh_n(r5, r5, r3, s4 - 1, 3);
#else
   cy = mpn_submul_1(r5, r3, s4 - 1, 8);
#endif
   r3[0] = saved;
	r3[0] -= (cy + 8*r3[s4-1]);
   
	mpn_rshift(r5, r5, s4, 3); 

	mpn_divexact_by3(r5, r5, s4); 
   
	mpn_sub_n(r6, r6, r2, s4);

#if HAVE_NATIVE_mpn_sublsh_n
	mpn_sublsh_n(r2, r2, r4, s4, 4);
#else
	mpn_submul_1(r2, r4, s4, 16);
#endif
   
   mpn_rshift(r2, r2, s4, 1); 

	mpn_divexact_by3(r2, r2, s4); 

   mpn_divexact_by3(r2, r2, s4); 
   
   saved = r3[0];
	r3[0] = r30;
   cy = mpn_sub_n(r3, r3, r5, s4 - 1);
   r30 = r3[0];
	r3[0] = saved;
	r3[s4-1] -= (cy + r5[s4-1]);
   
	mpn_sub_n(r4, r4, r2, s4);
	
	mpn_addmul_1(r6, r2, s4, 30);

   mpn_divexact_byfobm1(r6, r6, s4, CNST_LIMB(15), CNST_LIMB(~0/15));

	mpn_rshift(r6, r6, s4, 2);

	mpn_sub_n(r2, r2, r6, s4);

	TC4_NORM(r1, n1, s4);
   TC4_NORM(r2, n2, s4);
   
   (*rpn) = 6*sn+1;
	cy = mpn_add_1(r3, r3, *rpn - 4*sn, r30); /* don't forget to add r3[0] back in */
   if (cy) 
	{
		rp[*rpn] = cy;
	   (*rpn)++;
	}

	tc4_copy(rp, rpn, 5*sn, r2, n2);
   tc4_copy(rp, rpn, 6*sn, r1, n1);

	tc4_copy(rp, rpn, sn, r6, s4);
   tc4_copy(rp, rpn, 3*sn, r4, s4); 
}
Esempio n. 21
0
void
mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
			   mp_size_t w6n, mp_ptr tp)
{
  mp_size_t m;
  mp_limb_t cy;

  m = 2*n + 1;
#define w0 rp
#define w2 (rp + 2*n)
#define w6 (rp + 6*n)

  ASSERT (w6n > 0);
  ASSERT (w6n <= 2*n);

  /* Using formulas similar to Marco Bodrato's

     W5 = W5 + W4
     W1 =(W4 - W1)/2
     W4 = W4 - W0
     W4 =(W4 - W1)/4 - W6*16
     W3 =(W2 - W3)/2
     W2 = W2 - W3

     W5 = W5 - W2*65      May be negative.
     W2 = W2 - W6 - W0
     W5 =(W5 + W2*45)/2   Now >= 0 again.
     W4 =(W4 - W2)/3
     W2 = W2 - W4

     W1 = W5 - W1         May be negative.
     W5 =(W5 - W3*8)/9
     W3 = W3 - W5
     W1 =(W1/15 + W5)/2   Now >= 0 again.
     W5 = W5 - W1

     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
	   W4 = f(2), W5 = f(1/2), W6 = f(oo),

     Note that most intermediate results are positive; the ones that
     may be negative are represented in two's complement. We must
     never shift right a value that may be negative, since that would
     invalidate the sign bit. On the other hand, divexact by odd
     numbers work fine with two's complement.
  */

  mpn_add_n (w5, w5, w4, m);
  if (flags & toom7_w1_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w1, w1, w4, m);
#else
      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w1, w4, w1, m);
#else
      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
      mpn_rshift (w1, w1, m, 1);
#endif
    }
  mpn_sub (w4, w4, m, w0, 2*n);
  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
  mpn_rshift (w4, w4, m, 2); /* w4>=0 */

  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
  mpn_sub (w4, w4, m, tp, w6n+1);

  if (flags & toom7_w3_neg)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w3, w3, w2, m);
#else
      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w3, w2, w3, m);
#else
      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
      mpn_rshift (w3, w3, m, 1);
#endif
    }

  mpn_sub_n (w2, w2, w3, m);

  mpn_submul_1 (w5, w2, m, 65);
  mpn_sub (w2, w2, m, w6, w6n);
  mpn_sub (w2, w2, m, w0, 2*n);

  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
  mpn_rshift (w5, w5, m, 1);
  mpn_sub_n (w4, w4, w2, m);

  mpn_divexact_by3 (w4, w4, m);
  mpn_sub_n (w2, w2, w4, m);

  mpn_sub_n (w1, w5, w1, m);
  mpn_lshift (tp, w3, m, 3);
  mpn_sub_n (w5, w5, tp, m);
  mpn_divexact_by9 (w5, w5, m);
  mpn_sub_n (w3, w3, w5, m);

  mpn_divexact_by15 (w1, w1, m);
  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
  mpn_sub_n (w5, w5, w1, m);

  /* These bounds are valid for the 4x4 polynomial product of toom44,
   * and they are conservative for toom53 and toom62. */
  ASSERT (w1[2*n] < 2);
  ASSERT (w2[2*n] < 3);
  ASSERT (w3[2*n] < 4);
  ASSERT (w4[2*n] < 3);
  ASSERT (w5[2*n] < 2);

  /* Addition chain. Note carries and the 2n'th limbs that need to be
   * added in.
   *
   * Special care is needed for w2[2n] and the corresponding carry,
   * since the "simple" way of adding it all together would overwrite
   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
   * the high half of w3 and the low half of w4.
   *
   *         7    6    5    4    3    2    1    0
   *    |    |    |    |    |    |    |    |    |
   *                  ||w3 (2n+1)|
   *             ||w4 (2n+1)|
   *        ||w5 (2n+1)|        ||w1 (2n+1)|
   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
   *  -----------------------------------------------
   *  r |    |    |    |    |    |    |    |    |
   *        c7   c6   c5   c4   c3                 Carries to propagate
   */

  cy = mpn_add_n (rp + n, rp + n, w1, m);
  MPN_INCR_U (w2 + n + 1, n , cy);
  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
  if (w6n > n + 1)
    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
  else
    {
      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
#if WANT_ASSERT
      {
	mp_size_t i;
	for (i = w6n; i <= n; i++)
	  ASSERT (w5[n + i] == 0);
      }
#endif
    }
}
Esempio n. 22
0
void
check_functions (void)
{
  mp_limb_t  wp[2], wp2[2], xp[2], yp[2], r;
  int  i;

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      yp[0] = 456;
      mpn_add_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 579);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 702);
      ASSERT_ALWAYS (r == 0);
    }

#if HAVE_NATIVE_mpn_copyd
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyd (xp+1, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[1] == 123);
    }
#endif

#if HAVE_NATIVE_mpn_copyi
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyi (xp, xp+1, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[0] == 456);
    }
#endif

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 1605;
      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));
      ASSERT_ALWAYS (wp[0] == 321);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 1296;
      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));
      ASSERT_ALWAYS (wp[0] == 432);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 578;
      r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17));
      ASSERT_ALWAYS (wp[0] == 34);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 287;
      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[1] == 41);
      ASSERT_ALWAYS (wp[0] == 0);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 290;
      r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[0] == 41);
      ASSERT_ALWAYS (r == 3);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 12;
      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));
      ASSERT_ALWAYS (r == 3);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x1001;
      mpn_lshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x2002);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 14;
      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (r == 2);
    }

#if (GMP_NUMB_BITS % 4) == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      int  bits = (GMP_NUMB_BITS / 4) * 3;
      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;
      mp_limb_t  want = GMP_NUMB_MAX % mod;
      xp[0] = GMP_NUMB_MAX;
      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);
      ASSERT_ALWAYS (r % mod == want);
    }
#endif

  //   DECL_modexact_1c_odd ((*modexact_1c_odd)); 

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 14;
      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (wp[0] == 56);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 5;
      yp[0] = 7;
      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 35);
      ASSERT_ALWAYS (wp[1] == 0);
    }

#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x101;
      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,
                               GMP_LIMB_HIGHBIT,
                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);
      ASSERT_ALWAYS (wp[0] == 0x202);
      ASSERT_ALWAYS (wp[1] == 0);
      ASSERT_ALWAYS (r == 0);
    }
#endif

#if GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = GMP_LIMB_HIGHBIT+123;
      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,
                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));
      ASSERT_ALWAYS (r == 123);
    }
#endif


 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
       {
        xp[0] = 5;
        modlimb_invert(r,xp[0]);
        r=-r;
        yp[0]=43;
        yp[1]=75;
        mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r);
        ASSERT_ALWAYS (wp[0] == 78);
       }

 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
       {
        xp[0]=5;
        yp[0]=3;
        mpn_sumdiff_n (wp, wp2,xp, yp,1);
        ASSERT_ALWAYS (wp[0] == 8);
        ASSERT_ALWAYS (wp2[0] == 2);
       }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x8008;
      mpn_rshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x4004);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 5;
      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 25);
      ASSERT_ALWAYS (wp[1] == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 999;
      yp[0] = 666;
      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 333);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 210);
      ASSERT_ALWAYS (r == 0);
    }
}
Esempio n. 23
0
mp_limb_t
mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
{
  mp_limb_t v_inv;

  ASSERT (usize >= 1);
  ASSERT (vsize >= 1);
  ASSERT (usize * GMP_NUMB_BITS >= d);
  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
  ASSERT_MPN (up, usize);
  ASSERT_MPN (vp, vsize);

  /* 1/V mod 2^GMP_NUMB_BITS. */
  binvert_limb (v_inv, vp[0]);

  /* Fast code for two cases previously used by the accel part of mpn_gcd.
     (Could probably remove this now it's inlined there.) */
  if (usize == 2 && vsize == 2 &&
      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
    {
      mp_limb_t hi, lo;
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
      up[0] = 0;
      up[1] -= hi + q*vp[1];
      qp[0] = q;
      if (d == 2*GMP_NUMB_BITS)
        {
          q = (up[1] * v_inv) & GMP_NUMB_MASK;
          up[1] = 0;
          qp[1] = q;
        }
      return 0;
    }

  /* Main loop.  */
  while (d >= GMP_NUMB_BITS)
    {
      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      d -= GMP_NUMB_BITS;
      up += 1, usize -= 1;
      *qp++ = q;
    }

  if (d)
    {
      mp_limb_t b;
      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
      if (q <= 1)
	{
	  if (q == 0)
	    return 0;
	  else
	    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
	}
      else
	b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);

      if (usize > vsize)
	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
      return q;
    }

  return 0;
}