Exemple #1
0
void
_nmod_mat_mul_transpose_3(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B)
{
    long i, j, k;

    register mp_limb_t s0, s1, s2;
    register mp_limb_t t0, t1;
    register mp_limb_t c1, c2;

    for (i = 0; i < A->r; i++)
    {
        for (j = 0; j < B->r; j++)
        {
            s0 = s1 = s2 = 0UL;

            for (k = 0; k < A->c; k++)
            {
                umul_ppmm(t1, t0, A->rows[i][k], B->rows[j][k]);
                add_ssaaaa(c1, s0, (mp_limb_t) 0, s0, (mp_limb_t) 0, t0);
                add_ssaaaa(c2, s1, (mp_limb_t) 0, s1, (mp_limb_t) 0, t1);
                add_ssaaaa(s2, s1, s2, s1, c2, c1);
            }

            NMOD_RED(s2, s2, C->mod);
            NMOD_RED3(s0, s2, s1, s0, C->mod);
            C->rows[i][j] = s0;
        }
    }
}
Exemple #2
0
static void
mul_basecase (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
{
  mp_size_t i, j;
  mp_limb_t prod_low, prod_high;
  mp_limb_t cy_dig;
  mp_limb_t v_limb;

  /* Multiply by the first limb in V separately, as the result can
     be stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  v_limb = vp[0];
  cy_dig = 0;
  for (j = un; j > 0; j--)
    {
      mp_limb_t u_limb, w_limb;
      u_limb = *up++;
      umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
      add_ssaaaa (cy_dig, w_limb, prod_high, prod_low, 0, cy_dig << GMP_NAIL_BITS);
      *wp++ = w_limb >> GMP_NAIL_BITS;
    }

  *wp++ = cy_dig;
  wp -= un;
  up -= un;

  /* For each iteration in the outer loop, multiply one limb from
     U with one limb from V, and add it to PROD.  */
  for (i = 1; i < vn; i++)
    {
      v_limb = vp[i];
      cy_dig = 0;

      for (j = un; j > 0; j--)
	{
	  mp_limb_t u_limb, w_limb;
	  u_limb = *up++;
	  umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
	  w_limb = *wp;
	  add_ssaaaa (prod_high, prod_low, prod_high, prod_low, 0, w_limb << GMP_NAIL_BITS);
	  prod_low >>= GMP_NAIL_BITS;
	  prod_low += cy_dig;
#if GMP_NAIL_BITS == 0
	  cy_dig = prod_high + (prod_low < cy_dig);
#else
	  cy_dig = prod_high;
	  cy_dig += prod_low >> GMP_NUMB_BITS;
#endif
	  *wp++ = prod_low & GMP_NUMB_MASK;
	}

      *wp++ = cy_dig;
      wp -= un;
      up -= un;
    }
}
Exemple #3
0
mp_limb_t div_preinv1(mp_limb_t d1, mp_limb_t d2)
{
   mp_limb_t q, r[2], p[2], cy;
   
   if (d2 + 1 == 0 && d1 + 1 == 0)
      return 0;

   if (d1 + 1 == 0)
      q = ~d1, r[1] = ~d2;
   else
      udiv_qrnnd(q, r[1], ~d1, ~d2, d1 + 1);

   r[0] = 0;

   if (d2 + 1 == 0)
      add_ssaaaa(cy, r[1], 0, r[1], 0, q);   
   else
   {
      umul_ppmm(p[1], p[0], q, ~d2 - 1);
      cy = mpn_add_n(r, r, p, 2);
   }
 
   p[0] = d2 + 1, p[1] = d1 + (d2 + 1 == 0);
   if (cy || mpn_cmp(r, p, 2) >= 0)
      q++;
   
   return q;
}
Exemple #4
0
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ 
inline static void
mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t i, k;

#if GMP_NAIL_BITS==0
  mp_limb_t t1, t2, t3;
#endif

  ASSERT(n >= 3);  /* this restriction doesn't make a lot of sense in general */
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));

  k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */

#if GMP_NAIL_BITS!=0
  rp[n] = mpn_mul_1(rp + k, xp + k, 2, yp[0]);
#else

  umul_ppmm(t1, rp[k], xp[k], yp[0]);
  umul_ppmm(t3, t2, xp[k + 1], yp[0]);
  add_ssaaaa(rp[n], rp[k + 1], t3, t2, 0, t1);
#endif

  for (i = 1; i <= n - 2; i++)
     rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
  
  rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]);
  
  return;
}
Exemple #5
0
void
_nmod_mat_mul_transpose_2(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B)
{
    long i, j, k;

    register mp_limb_t s0, s1;
    register mp_limb_t t0, t1;

    for (i = 0; i < A->r; i++)
    {
        for (j = 0; j < B->r; j++)
        {
            s0 = s1 = 0UL;

            for (k = 0; k < A->c; k++)
            {
                umul_ppmm(t1, t0, A->rows[i][k], B->rows[j][k]);
                add_ssaaaa(s1, s0, s1, s0, t1, t0);
            }

            NMOD2_RED2(s0, s1, s0, C->mod);
            C->rows[i][j] = s0;
        }
    }
}
Exemple #6
0
/* in each round we remove one limb from the body, i.e. k = 1 */
void mpn_mod_1_3(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)
{
   mp_limb_t h, l, sh, sl, th, tl;
   mp_size_t j, jj;
 
   ASSERT(xn >= 5);
   ASSERT_MPN(xp, xn);
   ASSERT_LIMB(db[0]);
   ASSERT_LIMB(db[1]);
   ASSERT_LIMB(db[2]);
   ASSERT_LIMB(db[3]);

   tl = xp[xn - 2];
   th = xp[xn - 1];

   for (j = xn - 5; j >= 0; j -= 3)
   {
      umul_ppmm(sh, sl, xp[j + 1], db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);
      umul_ppmm(h, l, xp[j + 2], db[1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(h, l, tl, db[2]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[3]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   if (j > -3) /* we have at least three limbs to do, i.e. xp[0], ..., tl, th */
   {
      sh = 0;
      sl = xp[0];
      jj = 1;

      if (j == -1)
      {
         umul_ppmm(sh, sl, xp[1], db[0]);
         add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);
         jj = 2;
      }

      umul_ppmm(h, l, tl, db[jj - 1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[jj]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   umul_ppmm(h, l, th, db[0]);
   add_ssaaaa(h, l, h, l, 0, tl);

   rem[0] = l;
   rem[1] = h;
}
Exemple #7
0
void __div_helper(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t qn)
{   
   mpn_sub_n(np + 1, np + 1, dp, qn + 1);
   np[1] += dp[qn];
   
   for (qn--; qn >= 0; qn--)
   {
      qp[qn] = ~CNST_LIMB(0);
      add_ssaaaa(np[1], np[0], np[1], np[0], 0, dp[qn]);
   }
}
Exemple #8
0
void
fmpz_sub_ui(fmpz_t f, const fmpz_t g, ulong x)
{
    fmpz c = *g;

    if (!COEFF_IS_MPZ(c))       /* coeff is small */
    {
        mp_limb_t sum[2];
        if (c < 0L)             /* g negative, x positive, so difference is negative */
        {
            add_ssaaaa(sum[1], sum[0], 0, -c, 0, x);
            if (sum[1] == 0)
            {
                fmpz_set_ui(f, sum[0]); /* result fits in 1 limb */
                fmpz_neg(f, f);
            }
            else                /* result takes two limbs */
            {
                __mpz_struct * mpz_ptr;
                mpz_t temp;
                temp->_mp_d = sum;
                temp->_mp_size = -2;    /* result is negative number minus negative number, hence negative */

                mpz_ptr = _fmpz_promote(f);   /* g has already been read */
                mpz_set(mpz_ptr, temp);
            }
        }
        else                    /* coeff is non-negative, x non-negative */
        {
            if (x < c)
                fmpz_set_ui(f, c - x);  /* won't be negative and is smaller than c */
            else
            {
                fmpz_set_ui(f, x - c);  /* positive or zero */
                fmpz_neg(f, f);
            }
        }
    }
    else
    {
        __mpz_struct *mpz_ptr, *mpz_ptr2;
        mpz_ptr = COEFF_TO_PTR(c);
        mpz_ptr2 = _fmpz_promote(f);    /* g is already large */
        mpz_sub_ui(mpz_ptr2, mpz_ptr, x);
        _fmpz_demote_val(f);    /* cancellation may have occurred */
    }
}
Exemple #9
0
static __inline__ long
_fmpr_add_small(fmpr_t z, mp_limb_t x, int xsign, const fmpz_t xexp, mp_limb_t y, int ysign, const fmpz_t yexp, long shift, long prec, long rnd)
{
    mp_limb_t hi, lo, t, u;
    int sign = ysign;

    t = x;
    u = y;

    lo = u << shift;
    hi = (shift == 0) ? 0 : (u >> (FLINT_BITS - shift));

    if (xsign == ysign)
    {
        add_ssaaaa(hi, lo, hi, lo, 0, t);
    }
    else
    {
        if (hi == 0)
        {
            if (lo >= t)
            {
                lo = lo - t;
            }
            else
            {
                lo = t - lo;
                sign = !sign;
            }
        }
        else
        {
            sub_ddmmss(hi, lo, hi, lo, 0, t);
        }
    }

    if (hi == 0)
        return fmpr_set_round_ui_2exp_fmpz(z, lo, xexp, sign, prec, rnd);
    else
        return fmpr_set_round_uiui_2exp_fmpz(z, hi, lo, xexp, sign, prec, rnd);
}
Exemple #10
0
int main(void)
{
   int i, result;
   flint_rand_t state;
   flint_randinit(state);

   printf("sdiv_qrnnd....");
   fflush(stdout);

   for (i = 0; i < 1000000; i++)
   {
      mp_limb_signed_t d, nh, nl, q, r, ph, pl;

      do 
      {
         d = n_randtest_not_zero(state);
         nh = n_randtest(state);
      } while (FLINT_ABS(nh) >= FLINT_ABS(d)/2);
      nl = n_randtest(state);

      sdiv_qrnnd(q, r, nh, nl, d);
      smul_ppmm(ph, pl, d, q);
      if (r < 0L) sub_ddmmss(ph, pl, ph, pl, 0UL, -r);
      else add_ssaaaa(ph, pl, ph, pl, 0UL, r);

      result = ((ph == nh) && (pl == nl));

      if (!result)
      {
         printf("FAIL:\n");
         printf("nh = %lu, nl = %lu, d = %lu\n", nh, nl, d); 
         printf("ph = %lu, pl = %lu\n", ph, pl);
         abort();
      }
   }

   flint_randclear(state);

   printf("PASS\n");
   return 0;
}
Exemple #11
0
int main(void)
{
   int i, result;
   flint_rand_t state;
   
   printf("submod....");
   fflush(stdout);

   flint_randinit(state);

   for (i = 0; i < 1000000; i++)
   {
      mp_limb_t a, b, d, r1, r2, s1;
      
      d = n_randtest(state);
      a = n_randint(state, d);
      b = n_randint(state, d);
      
      r1 = n_submod(a, b, d);

      add_ssaaaa(s1, r2, 0UL, a, 0UL, d);
      sub_ddmmss(s1, r2, s1, r2, 0UL, b);
      if ((s1) || (r2 >= d)) r2 -= d;
      
      result = (r1 == r2);
      if (!result)
      {
         printf("FAIL:\n");
         printf("a = %lu, b = %lu, d = %lu\n", a, b, d); 
         printf("r1 = %lu, r2 = %lu\n", r1, r2);
         abort();
      }
   }

   flint_randclear(state);

   printf("PASS\n");
   return 0;
}
Exemple #12
0
/* in each round we remove one limb from the body, i.e. k = 1 */
void mpn_mod_1_2(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)
{
   mp_limb_t h, l, sh, sl, th, tl;
   mp_size_t j;
 
   ASSERT(xn >= 4);
   ASSERT_MPN(xp, xn);
   ASSERT_LIMB(db[0]);
   ASSERT_LIMB(db[1]);
   ASSERT_LIMB(db[2]);

   tl = xp[xn - 2];
   th = xp[xn - 1];

   for (j = xn - 4; j >= 0; j -= 2)
   {
      umul_ppmm(sh, sl, xp[j + 1], db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);
      umul_ppmm(h, l, tl, db[1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[2]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   if (j > -2) /* we have at least three limbs to do i.e. xp[0], ..., tl, th */
   {
      umul_ppmm(sh, sl, tl, db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);
      umul_ppmm(th, tl, th, db[1]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   umul_ppmm(h, l, th, db[0]);
   add_ssaaaa(h, l, h, l, 0, tl);

   rem[0] = l;
   rem[1] = h;
}
Exemple #13
0
/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
 *
 * The result is expected to be ZERO if and only if one of the operand
 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
 * combine results and obtain a natural number when one knows in
 * advance that the final value is less than (B^rn-1).
 * Moreover it should not be a problem if mulmod_bnm1 is used to
 * compute the full product with an+bn <= rn, because this condition
 * implies (B^an-1)(B^bn-1) < (B^rn-1) .
 *
 * Requires 0 < bn <= an <= rn and an + bn > rn/2
 * Scratch need: rn + (need for recursive call OR rn + 4). This gives
 *
 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
 */
void
mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
{
  ASSERT (0 < bn);
  ASSERT (bn <= an);
  ASSERT (an <= rn);

  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
    {
      if (UNLIKELY (bn < rn))
	{
	  if (UNLIKELY (an + bn <= rn))
	    {
	      mpn_mul (rp, ap, an, bp, bn);
	    }
	  else
	    {
	      mp_limb_t cy;
	      mpn_mul (tp, ap, an, bp, bn);
	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
	      MPN_INCR_U (rp, rn, cy);
	    }
	}
      else
	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
    }
  else
    {
      mp_size_t n;
      mp_limb_t cy;
      mp_limb_t hi;

      n = rn >> 1;

      /* We need at least an + bn >= n, to be able to fit one of the
	 recursive products at rp. Requiring strict inequality makes
	 the coded slightly simpler. If desired, we could avoid this
	 restriction by initially halving rn as long as rn is even and
	 an + bn <= rn/2. */

      ASSERT (an + bn > n);

      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
	 and crt together as

	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
      */

#define a0 ap
#define a1 (ap + n)
#define b0 bp
#define b1 (bp + n)

#define xp  tp	/* 2n + 2 */
      /* am1  maybe in {xp, n} */
      /* bm1  maybe in {xp + n, n} */
#define sp1 (tp + 2*n + 2)
      /* ap1  maybe in {sp1, n + 1} */
      /* bp1  maybe in {sp1 + n + 1, n + 1} */

      {
	mp_srcptr am1, bm1;
	mp_size_t anm, bnm;
	mp_ptr so;

	bm1 = b0;
	bnm = bn;
	if (LIKELY (an > n))
	  {
	    am1 = xp;
	    cy = mpn_add (xp, a0, n, a1, an - n);
	    MPN_INCR_U (xp, n, cy);
	    anm = n;
	    so = xp + n;
	    if (LIKELY (bn > n))
	      {
		bm1 = so;
		cy = mpn_add (so, b0, n, b1, bn - n);
		MPN_INCR_U (so, n, cy);
		bnm = n;
		so += n;
	      }
	  }
	else
	  {
	    so = xp;
	    am1 = a0;
	    anm = an;
	  }

	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
      }

      {
	int       k;
	mp_srcptr ap1, bp1;
	mp_size_t anp, bnp;

	bp1 = b0;
	bnp = bn;
	if (LIKELY (an > n)) {
	  ap1 = sp1;
	  cy = mpn_sub (sp1, a0, n, a1, an - n);
	  sp1[n] = 0;
	  MPN_INCR_U (sp1, n + 1, cy);
	  anp = n + ap1[n];
	  if (LIKELY (bn > n)) {
	    bp1 = sp1 + n + 1;
	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
	    sp1[2*n+1] = 0;
	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
	    bnp = n + bp1[n];
	  }
	} else {
	  ap1 = a0;
	  anp = an;
	}

	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
	  k=0;
	else
	  {
	    int mask;
	    k = mpn_fft_best_k (n, 0);
	    mask = (1<<k) - 1;
	    while (n & mask) {k--; mask >>=1;};
	  }
	if (k >= FFT_FIRST_K)
	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
	else if (UNLIKELY (bp1 == b0))
	  {
	    ASSERT (anp + bnp <= 2*n+1);
	    ASSERT (anp + bnp > n);
	    ASSERT (anp >= bnp);
	    mpn_mul (xp, ap1, anp, bp1, bnp);
	    anp = anp + bnp - n;
	    ASSERT (anp <= n || xp[2*n]==0);
	    anp-= anp > n;
	    cy = mpn_sub (xp, xp, n, xp + n, anp);
	    xp[n] = 0;
	    MPN_INCR_U (xp, n+1, cy);
	  }
	else
	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
      }

      /* Here the CRT recomposition begins.

	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
	 Division by 2 is a bitwise rotation.

	 Assumes xp normalised mod (B^n+1).

	 The residue class [0] is represented by [B^n-1]; except when
	 both input are ZERO.
      */

#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
#if HAVE_NATIVE_mpn_rsh1add_nc
      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
      hi = cy << (GMP_NUMB_BITS - 1);
      cy = 0;
      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
	 overflows, i.e. a further increment will not overflow again. */
#else /* ! _nc */
      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
      cy >>= 1;
      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
#endif
#if GMP_NAIL_BITS == 0
      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
#else
      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
      rp[n-1] ^= hi;
#endif
#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
#if HAVE_NATIVE_mpn_add_nc
      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
#else /* ! _nc */
      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
#endif
      cy += (rp[0]&1);
      mpn_rshift(rp, rp, n, 1);
      ASSERT (cy <= 2);
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
      cy >>= 1;
      /* We can have cy != 0 only if hi = 0... */
      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
      rp[n-1] |= hi;
      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
#endif
      ASSERT (cy <= 1);
      /* Next increment can not overflow, read the previous comments about cy. */
      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
      MPN_INCR_U(rp, n, cy);

      /* Compute the highest half:
	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
       */
      if (UNLIKELY (an + bn < rn))
	{
	  /* Note that in this case, the only way the result can equal
	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
	     then the output of both the recursive calls and this CRT
	     reconstruction is zero, not B^{rn} - 1. Which is good,
	     since the latter representation doesn't fit in the output
	     area.*/
	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);

	  /* FIXME: This subtraction of the high parts is not really
	     necessary, we do it to get the carry out, and for sanity
	     checking. */
	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
				   xp + an + bn - n, rn - (an + bn), cy);
	  ASSERT (an + bn == rn - 1 ||
		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
	  ASSERT (cy == (xp + an + bn - n)[0]);
	}
      else
	{
	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
	     DECR will affect _at most_ the lowest n limbs. */
	  MPN_DECR_U (rp, 2*n, cy);
	}
#undef a0
#undef a1
#undef b0
#undef b1
#undef xp
#undef sp1
    }
}
void my_udiv_qr_3by2 ( mp_limb_t* p_q, mp_limb_t* p_r1, mp_limb_t* p_r0, mp_limb_t* p_n2, mp_limb_t* p_n1, mp_limb_t* p_n0, mp_limb_t* p_d1, mp_limb_t* p_d0, mp_limb_t* p_dinv, mp_limb_t* p__q0, mp_limb_t* p__t1, mp_limb_t* p__t0, mp_limb_t* p__mask) {

/*
mp_limb_t q = *p_q;
mp_limb_t r0 = *p_r0;
mp_limb_t r1 = *p_r1;
mp_limb_t n0 = *p_n0;
mp_limb_t n1 = *p_n1;
mp_limb_t n2 = *p_n2;
mp_limb_t dinv = *p_dinv;
mp_limb_t d0 = *p_d0;
mp_limb_t d1 = *p_d1;
    mp_limb_t _q0, _t1, _t0, _mask;                                     
_q0 = *p__q0;
_t0 = *p__t0;
_t1 = *p__t1;
_mask = *p__mask;
*/
#define q (*p_q)
#define r0 (*p_r0)
#define r1 (*p_r1)
#define n0 (*p_n0)
#define n1 (*p_n1)
#define n2 (*p_n2)
#define dinv (*p_dinv)
#define d0 (*p_d0)
#define d1 (*p_d1)
#define _q0 (*p__q0)
#define _t0 (*p__t0)
#define _t1 (*p__t1)
#define _mask (*p__mask)

printf("n0 %08lx\n", n0);
printf("n1 %08lx\n", n1);
printf("n2 %08lx\n", n2);
printf("dinv %08lx\n", dinv);
printf("d0 %08lx\n", d0);
printf("d1 %08lx\n", d1);

    umul_ppmm ((q), _q0, (n2), (dinv));                                 
printf("q %08lx\n", q);
printf("_q0 %08lx\n", _q0);
    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));                        
printf("q %08lx\n", q);
                                                                        
    /* Compute the two most significant limbs of n - q'd */             
    (r1) = (n1) - (d1) * (q);                                           
printf("r1 %08lx\n", r1);
    (r0) = (n0);                                                        
printf("r0 %08lx\n", r0);
    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));                    
    umul_ppmm (_t1, _t0, (d0), (q));                                    
printf("_t0 %08lx\n", _t0);
printf("_t1 %08lx\n", _t1);
    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);                      
    (q)++;                                                              
                                                                        
    /* Conditionally adjust q and the remainders */                     
    _mask = - (mp_limb_t) ((r1) >= _q0);                                
printf("_mask %08lx\n", _mask);
    (q) += _mask;                                                       
printf("q %08lx\n", q);
    add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));    
    if (UNLIKELY ((r1) >= (d1)))                                        
      {                                                                 
        if ((r1) > (d1) || (r0) >= (d0))                                
          {                                                             
            (q)++;                                                      
printf("q %08lx\n", q);
            sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));            
          }                                                             
      }                                                                 

#undef q
#undef r0
#undef r1
#undef n0
#undef n1
#undef n2
#undef dinv
#undef d0
#undef d1
#undef _q0
#undef _t1
#undef _t0
#undef _mask
}
/* 
   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is
   either correct or one too large. We require dp to be normalised and inv
   to be a precomputed inverse given by mpn_invert.
*/
mp_limb_t 
mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, 
                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);
   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }
   
   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy + 1);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
    
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }
  
   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) 
   {
	   /* Special case, multiply out to get accurate quotient */
	   ret -= mpn_sub_1(qp, qp, dn, 1);
       if (UNLIKELY(ret == ~CNST_LIMB(0)))
          ret += mpn_add_1(qp, qp, dn, 1);
       /* ret is now guaranteed to be 0*/
       ASSERT(ret == 0);
       mpn_mul_n(tp, qp, dp, dn);
       mpn_sub_n(tp, np, tp, dn+1);
       while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)
	   {
		   ret += mpn_add_1(qp, qp, dn, 1);
		   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);
	   }
       /* Not possible for ret == 2 as we have qp*dp <= np */
       ASSERT(ret + ret2 < 2);
   }

   TMP_FREE;

   return ret + ret2;
}
/* 
   Computes the quotient and remainder of { np, 2*dn } by { dp, dn }.
   We require dp to be normalised and inv to be a precomputed inverse 
   of { dp, dn } given by mpn_invert.
*/
mp_limb_t 
mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, 
                           mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_size_t m, i;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }

   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
     
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }

   ret -= mpn_sub_1(qp, qp, dn, 1); 
   if (UNLIKELY(ret == ~CNST_LIMB(0))) 
      ret += mpn_add_1(qp, qp, dn, 1);
   /* ret is now guaranteed to be 0 or 1*/
   ASSERT(ret == 0);

   m = dn + 1;
   if ((dn <= MPN_FFT_MUL_N_MINSIZE) || (ret))
   {
      mpn_mul_n(tp, qp, dp, dn);
   } else
   {
      mp_limb_t cy, cy2;
      
      if (m >= FFT_MULMOD_2EXPP1_CUTOFF)
         m = mpir_fft_adjust_limbs (m);
      cy = mpn_mulmod_Bexpp1_fft (tp, m, qp, dn, dp, dn);
      
      /* cy, {tp, m} = qp * dp mod (B^m+1) */ 
      cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);
      mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
          
      /* Make correction */    
      mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);
   }
   
   mpn_sub_n(np, np, tp, m);
   MPN_ZERO(np + m, 2*dn - m);
   while (np[dn] || mpn_cmp(np, dp, dn) >= 0)
   {
	   ret += mpn_add_1(qp, qp, dn, 1);
	   np[dn] -= mpn_sub_n(np, np, dp, dn);
   }
  
   /* Not possible for ret == 2 as we have qp*dp <= np */
   ASSERT(ret + ret2 < 2);

   TMP_FREE;

   return ret + ret2;
}
Exemple #17
0
Fichier : mul.c Projet : gnooth/xcl
int
mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
{
    int sign, inexact;
    mpfr_exp_t ax, ax2;
    mp_limb_t *tmp;
    mp_limb_t b1;
    mpfr_prec_t bq, cq;
    mp_size_t bn, cn, tn, k;
    MPFR_TMP_DECL (marker);

    MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode),
                   ("a[%#R]=%R inexact=%d", a, a, inexact));

    /* deal with special cases */
    if (MPFR_ARE_SINGULAR (b, c))
    {
        if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
        {
            MPFR_SET_NAN (a);
            MPFR_RET_NAN;
        }
        sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
        if (MPFR_IS_INF (b))
        {
            if (!MPFR_IS_ZERO (c))
            {
                MPFR_SET_SIGN (a, sign);
                MPFR_SET_INF (a);
                MPFR_RET (0);
            }
            else
            {
                MPFR_SET_NAN (a);
                MPFR_RET_NAN;
            }
        }
        else if (MPFR_IS_INF (c))
        {
            if (!MPFR_IS_ZERO (b))
            {
                MPFR_SET_SIGN (a, sign);
                MPFR_SET_INF (a);
                MPFR_RET(0);
            }
            else
            {
                MPFR_SET_NAN (a);
                MPFR_RET_NAN;
            }
        }
        else
        {
            MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
            MPFR_SET_SIGN (a, sign);
            MPFR_SET_ZERO (a);
            MPFR_RET (0);
        }
    }
    sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));

    ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
    /* Note: the exponent of the exact result will be e = bx + cx + ec with
       ec in {-1,0,1} and the following assumes that e is representable. */

    /* FIXME: Useful since we do an exponent check after ?
     * It is useful iff the precision is big, there is an overflow
     * and we are doing further mults...*/
#ifdef HUGE
    if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
        return mpfr_overflow (a, rnd_mode, sign);
    if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
        return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
                               sign);
#endif

    bq = MPFR_PREC (b);
    cq = MPFR_PREC (c);

    MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */

    bn = (bq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of b */
    cn = (cq+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; /* number of limbs of c */
    k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
    tn = (bq + cq + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
    MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */

    /* Check for no size_t overflow*/
    MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
    MPFR_TMP_MARK (marker);
    tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB);

    /* multiplies two mantissa in temporary allocated space */
    if (MPFR_UNLIKELY (bn < cn))
    {
        mpfr_srcptr z = b;
        mp_size_t zn  = bn;
        b = c;
        bn = cn;
        c = z;
        cn = zn;
    }
    MPFR_ASSERTD (bn >= cn);
    if (MPFR_LIKELY (bn <= 2))
    {
        if (bn == 1)
        {
            /* 1 limb * 1 limb */
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            b1 = tmp[1];
        }
        else if (MPFR_UNLIKELY (cn == 1))
        {
            /* 2 limbs * 1 limb */
            mp_limb_t t;
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
            b1 = tmp[2];
        }
        else
        {
            /* 2 limbs * 2 limbs */
            mp_limb_t t1, t2, t3;
            /* First 2 limbs * 1 limb */
            umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
            umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
            /* Second, the other 2 limbs * 1 limb product */
            umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
            umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
            add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
            /* Sum those two partial products */
            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
            tmp[3] += (tmp[2] < t1);
            b1 = tmp[3];
        }
        b1 >>= (GMP_NUMB_BITS - 1);
        tmp += k - tn;
        if (MPFR_UNLIKELY (b1 == 0))
            mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
    }
    else
        /* Mulders' mulhigh. Disable if squaring, since it is not tuned for
           such a case */
        if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))
Exemple #18
0
mp_limb_t
mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
		   mp_limb_t d, mp_limb_t dinv)
{
  mp_limb_t B2;
  mp_limb_t u0, u2;
  mp_limb_t q0, q1;
  mp_limb_t p0, p1;
  mp_limb_t t;
  mp_size_t j;

  ASSERT (d & GMP_LIMB_HIGHBIT);
  ASSERT (n > 0);
  ASSERT (u1 < d);

  if (n == 1)
    {
      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
      return u1;
    }

  /* FIXME: Could be precomputed */
  B2 = -d*dinv;

  umul_ppmm (q1, q0, dinv, u1);
  umul_ppmm (p1, p0, B2, u1);
  q1 += u1;
  ASSERT (q1 >= u1);
  u0 = up[n-1];	/* Early read, to allow qp == up. */
  qp[n-1] = q1;

  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);

  /* FIXME: Keep q1 in a variable between iterations, to reduce number
     of memory accesses. */
  for (j = n-2; j-- > 0; )
    {
      mp_limb_t q2, cy;

      /* Additions for the q update:
       *	+-------+
       *        |u1 * v |
       *        +---+---+
       *        | u1|
       *    +---+---+
       *    | 1 | v |  (conditional on u2)
       *    +---+---+
       *        | 1 |  (conditional on u0 + u2 B2 carry)
       *        +---+
       * +      | q0|
       *   -+---+---+---+
       *    | q2| q1| q0|
       *    +---+---+---+
      */
      umul_ppmm (p1, t, u1, dinv);
      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
      q0 = t;

      umul_ppmm (p1, p0, u1, B2);
      ADDC_LIMB (cy, u0, u0, u2 & B2);
      u0 -= (-cy) & d;

      /* Final q update */
      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
      qp[j+1] = q1;
      MPN_INCR_U (qp+j+2, n-j-2, q2);

      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
    }

  q1 = (u2 > 0);
  u1 -= (-q1) & d;

  t = (u1 >= d);
  q1 += t;
  u1 -= (-t) & d;

  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);

  MPN_INCR_U (qp+1, n-1, q1);

  qp[0] = q0;
  return u0;
}
Exemple #19
0
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, 
                 mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w)
{
   mp_size_t n = (((mp_size_t)1)<<depth);
   mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n);
   
   mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS;
   mp_size_t size = limbs + 1;
   mp_size_t i, j, ll;

   mp_limb_t * ptr;
   mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0;
   mp_limb_t c;
   TMP_DECL;

   TMP_MARK;
   ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size);
   for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) 
   {
      ii[i] = ptr;
   }
   ii0 = ptr;
   t1 = ii0 + 2*n;
   t2 = t1 + size;
   s1 = t2 + size;
   r = s1 + size;
   tt = r + 2*n;
   
   if (i1 != i2)
   {
      jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n);
      for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) 
      {
         jj[i] = ptr;
      }
      jj0 = ptr;
   } else
   {
      jj = ii;
      jj0 = ii0;
   }

   j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs);
   for ( ; j < 2*n; j++)
      mpn_zero(ii[j], limbs + 1);

   for (i = 0; i < 2*n; i++)
      ii0[i] = ii[i][0];
 
   mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);
   for (j = 0; j < 2*n; j++)
      mpn_normmod_2expp1(ii[j], limbs);

   if (i1 != i2)
   {
      j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs);
      for ( ; j < 2*n; j++)
          mpn_zero(jj[j], limbs + 1);

      for (i = 0; i < 2*n; i++)
         jj0[i] = jj[i][0];

      mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1);
   }
      
   for (j = 0; j < 2*n; j++)
   {
      if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);
      c = 2*ii[j][limbs] + jj[j][limbs];

      ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);
   }
   
   mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
   
   mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n);

   for (j = 0; j < 2*n; j++)
   {
      mp_limb_t t, cy2;
      
      mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1);
      mpn_normmod_2expp1(ii[j], limbs);

      t = ii[j][limbs];
      ii[j][limbs] = r[j] - ii[j][0];
      cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]);
      add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t);
      if (cy2) r[j]++;
   }
   
   mpn_zero(r1, r_limbs + 1);
   mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1);
   
   /* 
      as the negacyclic convolution has effectively done subtractions
      some of the coefficients will be negative, so need to subtract p
   */
   ll = 0;
   limb_add = bits1/GMP_LIMB_BITS;
   
   for (j = 0; j < 2*n - 2; j++)
   {   
      if (r[j]) 
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
      else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      {
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
         mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1);
      }

      ll += limb_add;
   }
   /* penultimate coefficient, top bit was already ignored */
   if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
   
   /* final coefficient wraps around */
   if (limb_add)
      r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add);
   c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add);
   mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c);
   mpn_normmod_2expp1(r1, r_limbs);
   
   TMP_FREE;
}
Exemple #20
0
mp_limb_t
mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
	    mp_ptr np, mp_size_t nsize,
	    mp_srcptr dp, mp_size_t dsize)
{
  mp_limb_t most_significant_q_limb = 0;

  switch (dsize)
    {
    case 0:
      /* We are asked to divide by zero, so go ahead and do it!  (To make
	 the compiler not remove this statement, return the value.)  */
      return 1 / dsize;

    case 1:
      {
	mp_size_t i;
	mp_limb_t n1;
	mp_limb_t d;

	d = dp[0];
	n1 = np[nsize - 1];

	if (n1 >= d)
	  {
	    n1 -= d;
	    most_significant_q_limb = 1;
	  }

	qp += qextra_limbs;
	for (i = nsize - 2; i >= 0; i--)
	  udiv_qrnnd (qp[i], n1, n1, np[i], d);
	qp -= qextra_limbs;

	for (i = qextra_limbs - 1; i >= 0; i--)
	  udiv_qrnnd (qp[i], n1, n1, 0, d);

	np[0] = n1;
      }
      break;

    case 2:
      {
	mp_size_t i;
	mp_limb_t n1, n0, n2;
	mp_limb_t d1, d0;

	np += nsize - 2;
	d1 = dp[1];
	d0 = dp[0];
	n1 = np[1];
	n0 = np[0];

	if (n1 >= d1 && (n1 > d1 || n0 >= d0))
	  {
	    sub_ddmmss (n1, n0, n1, n0, d1, d0);
	    most_significant_q_limb = 1;
	  }

	for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
	  {
	    mp_limb_t q;
	    mp_limb_t r;

	    if (i >= qextra_limbs)
	      np--;
	    else
	      np[0] = 0;

	    if (n1 == d1)
	      {
		/* Q should be either 111..111 or 111..110.  Need special
		   treatment of this rare case as normal division would
		   give overflow.  */
		q = ~(mp_limb_t) 0;

		r = n0 + d1;
		if (r < d1)	/* Carry in the addition? */
		  {
		    add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
		    qp[i] = q;
		    continue;
		  }
		n1 = d0 - (d0 != 0);
		n0 = -d0;
	      }
	    else
	      {
		udiv_qrnnd (q, r, n1, n0, d1);
		umul_ppmm (n1, n0, d0, q);
	      }

	    n2 = np[0];
	  q_test:
	    if (n1 > r || (n1 == r && n0 > n2))
	      {
		/* The estimated Q was too large.  */
		q--;

		sub_ddmmss (n1, n0, n1, n0, 0, d0);
		r += d1;
		if (r >= d1)	/* If not carry, test Q again.  */
		  goto q_test;
	      }

	    qp[i] = q;
	    sub_ddmmss (n1, n0, r, n2, n1, n0);
	  }
	np[1] = n1;
	np[0] = n0;
      }
      break;

    default:
      {
	mp_size_t i;
	mp_limb_t dX, d1, n0;

	np += nsize - dsize;
	dX = dp[dsize - 1];
	d1 = dp[dsize - 2];
	n0 = np[dsize - 1];

	if (n0 >= dX)
	  {
	    if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
	      {
		mpn_sub_n (np, np, dp, dsize);
		n0 = np[dsize - 1];
		most_significant_q_limb = 1;
	      }
	  }

	for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
	  {
	    mp_limb_t q;
	    mp_limb_t n1, n2;
	    mp_limb_t cy_limb;

	    if (i >= qextra_limbs)
	      {
		np--;
		n2 = np[dsize];
	      }
	    else
	      {
		n2 = np[dsize - 1];
		MPN_COPY_DECR (np + 1, np, dsize);
		np[0] = 0;
	      }

	    if (n0 == dX)
	      /* This might over-estimate q, but it's probably not worth
		 the extra code here to find out.  */
	      q = ~(mp_limb_t) 0;
	    else
	      {
		mp_limb_t r;

		udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
		umul_ppmm (n1, n0, d1, q);

		while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
		  {
		    q--;
		    r += dX;
		    if (r < dX)	/* I.e. "carry in previous addition?"  */
		      break;
		    n1 -= n0 < d1;
		    n0 -= d1;
		  }
	      }

	    /* Possible optimization: We already have (q * n0) and (1 * n1)
	       after the calculation of q.  Taking advantage of that, we
	       could make this loop make two iterations less.  */

	    cy_limb = mpn_submul_1 (np, dp, dsize, q);

	    if (n2 != cy_limb)
	      {
		mpn_add_n (np, np, dp, dsize);
		q--;
	      }

	    qp[i] = q;
	    n0 = np[dsize - 1];
	  }
      }
    }

  return most_significant_q_limb;
}
Exemple #21
0
mp_limb_t
mpn_sb_div_q (mp_ptr qp,
		 mp_ptr np, mp_size_t nn,
		 mp_srcptr dp, mp_size_t dn,
		 mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t qn, i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0, d11, d01;
  mp_limb_t cy, cy1;
  mp_limb_t q;
  mp_limb_t flag;

  mp_size_t dn_orig = dn, qn_orig;
  mp_srcptr dp_orig = dp;
  mp_ptr np_orig = np;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  if (dn <= SB_DIVAPPR_Q_SMALL_THRESHOLD)
     {
   qn_orig = qn;

   /* Reduce until dn - 2 >= qn */
   for (qn--, np--; qn > dn - 2; qn--)
     {
       /* fetch next word */
       cy = np[0];

       np--;
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
      
	    /* np -= dp*q1 */
       cy -= mpn_submul_1(np - dn + 1, dp, dn, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[dn - 1]))
         {
       if (cy || mpn_cmp(np - dn + 1, dp, dn) >= 0)
       {
          q++;
          cy -= mpn_sub_n(np - dn + 1, np - dn + 1, dp, dn);
       }
       }

       qp[qn] = q;
     }
   
   qn++;
   dp = dp + dn - qn - 1; /* make dp length qn + 1 */
   
   flag = ~CNST_LIMB(0);

   for ( ; qn > 0; qn--)
     {
       /* fetch next word */
       cy = np[0];
 
       np--;
       /* rare case where truncation ruins normalisation */
       if (cy > dp[qn] || (cy == dp[qn] && mpn_cmp(np - qn + 1, dp, qn) >= 0))
         {
       __div_helper(qp, np - qn, dp, qn);
       flag = 0;
       break;
         }
       
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
         
       /* np -= dp*q */
       cy -= mpn_submul_1(np - qn, dp, qn + 1, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[qn]))
         {
       if (cy || mpn_cmp(np - qn, dp, qn + 1) >= 0)
         {
       q++;
       cy -= mpn_sub_n(np - qn, np - qn, dp, qn + 1);
         }
         }
       
       qp[qn - 1] = q;
       dp++;
     }

     np--;
     n1 = np[1];
     qn = qn_orig;
     } 
  else
     {
  qp += qn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  d01 = d0 + 1;
  d11 = d1 + (d01 < d0);
  
  np -= 2;

  n1 = np[1];

  for (i = qn - (dn + 2); i >= 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 -= cy1;
	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
	      q--;
	    }
	}

      *--qp = q;
    }

  flag = ~CNST_LIMB(0);

  if (dn >= 0)
    {
      for (i = dn; i > 0; i--)
	{
	  np--;
	  if (UNLIKELY (n1 >= (d1 & flag)))
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);

	      if (UNLIKELY (n1 != cy))
		{
		  if (n1 < (cy & flag))
		    {
		      q--;
		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
		    }
		  else
		    flag = 0;
		}
	      n1 = np[1];
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	      cy = mpn_submul_1 (np - dn, dp, dn, q);

	      cy1 = n0 < cy;
	      n0 = (n0 - cy) & GMP_NUMB_MASK;
	      cy = n1 < cy1;
	      n1 -= cy1;
	      np[0] = n0;

	      if (UNLIKELY (cy != 0))
		{
		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
		  q--;
		}
	    }

	  *--qp = q;

	  /* Truncate operands.  */
	  dn--;
	  dp++;
	}

      np--;
      if (UNLIKELY (n1 >= (d1 & flag)))
	{
	  q = GMP_NUMB_MASK;
	  cy = mpn_submul_1 (np, dp, 2, q);

	  if (UNLIKELY (n1 != cy))
	    {
	      if (n1 < (cy & flag))
		{
		  q--;
		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
		}
	      else
		flag = 0;
	    }
	  n1 = np[1];
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  np[0] = n0;
	  np[1] = n1;
	}

      *--qp = q;
    }
  ASSERT_ALWAYS (np[1] == n1);
  }

  np += 2;

  dn = dn_orig;
  if (UNLIKELY (n1 < (dn & flag)))
    {
      mp_limb_t q, x;

      /* The quotient may be too large if the remainder is small.  Recompute
	 for above ignored operand parts, until the remainder spills.

	 FIXME: The quality of this code isn't the same as the code above.
	 1. We don't compute things in an optimal order, high-to-low, in order
	    to terminate as quickly as possible.
	 2. We mess with pointers and sizes, adding and subtracting and
	    adjusting to get things right.  It surely could be streamlined.
	 3. The only termination criteria are that we determine that the
	    quotient needs to be adjusted, or that we have recomputed
	    everything.  We should stop when the remainder is so large
	    that no additional subtracting could make it spill.
	 4. If nothing else, we should not do two loops of submul_1 over the
	    data, instead handle both the triangularization and chopping at
	    once.  */

      x = n1;

      if (dn > 2)
	{
	  /* Compensate for triangularization.  */
	  mp_limb_t y;

	  dp = dp_orig;
	  if (qn + 1 < dn)
	    {
	      dp += dn - (qn + 1);
	      dn = qn + 1;
	    }

	  y = np[-2];

	  for (i = dn - 3; i >= 0; i--)
	    {
	      q = qp[i];
	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);

	      if (y < cy)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      ASSERT_ALWAYS (cy == 0);
		      return qh - cy;
		    }
		  x--;
		}
	      y -= cy;
	    }
	  np[-2] = y;
	}

      dn = dn_orig;
      if (qn + 1 < dn)
	{
	  /* Compensate for ignored dividend and divisor tails.  */

	  dp = dp_orig;
	  np = np_orig;

	  if (qh != 0)
	    {
	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      if (qn != 0)
			cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh - cy;
		    }
		  x--;
		}
	    }

	  if (qn == 0)
	    return qh;

	  for (i = dn - qn - 2; i >= 0; i--)
	    {
	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh;
		    }
		  x--;
		}
	    }
	}
    }

  return qh;
}
Exemple #22
0
mp_limb_t
mpn_div_qr_1n_pi2 (mp_ptr qp,
		   mp_srcptr up, mp_size_t un,
		   struct precomp_div_1_pi2 *pd)
{
  mp_limb_t most_significant_q_limb;
  mp_size_t i;
  mp_limb_t r, u2, u1, u0;
  mp_limb_t d0, di1, di0;
  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
  mp_limb_t cnd;

  ASSERT (un >= 2);
  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
  ASSERT_MPN (up, un);

#define q3 q3a
#define q2 q2b
#define q1 q1b

  up += un - 3;
  r = up[2];
  d0 = pd->d;

  most_significant_q_limb = (r >= d0);
  r -= d0 & -most_significant_q_limb;

  qp += un - 3;
  qp[2] = most_significant_q_limb;

  di1 = pd->dip[1];
  di0 = pd->dip[0];

  for (i = un - 3; i >= 0; i -= 2)
    {
      u2 = r;
      u1 = up[1];
      u0 = up[0];

      /* Dividend in {r,u1,u0} */

      umul_ppmm (q1d,q0d, u1, di0);
      umul_ppmm (q2b,q1b, u1, di1);
      q2b++;				/* cannot spill */
      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);

      umul_ppmm (q2c,q1c, u2,  di0);
      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
      umul_ppmm (q3a,q2a, u2, di1);

      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);

      q3 += r;

      r = u0 - q2 * d0;

      cnd = (r >= q1);
      r += d0 & -cnd;
      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);

      if (UNLIKELY (r >= d0))
	{
	  r -= d0;
	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
	}

      qp[0] = q2;
      qp[1] = q3;

      up -= 2;
      qp -= 2;
    }

  if ((un & 1) == 0)
    {
      u2 = r;
      u1 = up[1];

      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
      qp[1] = q3;
    }

  return r;

#undef q3
#undef q2
#undef q1
}