Пример #1
0
void
my__gmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
{
	ASSERT_ALWAYS (qxn == 0);

	ASSERT (nn >= 0);
	ASSERT (dn >= 0);
	ASSERT (dn == 0 || dp[dn - 1] != 0);
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));

	int adjust;
	gmp_pi1_t dinv;
	TMP_DECL;
	TMP_MARK;
								 /* conservative tests for quotient size */
	adjust = np[nn - 1] >= dp[dn - 1];
	mp_ptr n2p, d2p;
	mp_limb_t cy;
	int cnt;

	qp[nn - dn] = 0;			 /* zero high quotient limb */
	count_leading_zeros (cnt, dp[dn - 1]);
	cnt -= GMP_NAIL_BITS;
	d2p = TMP_ALLOC_LIMBS (dn);
	mpn_lshift (d2p, dp, dn, cnt);

	for (int i=0; i<dn; i+=1)
	{
		printf("d2p %08x\n", *( (int*) (((void*)(d2p))+(i*4))));
	}


	n2p = TMP_ALLOC_LIMBS (nn + 1);
	cy = mpn_lshift (n2p, np, nn, cnt);
	for (int i=0; i<nn; i+=1)
	{
		printf("n2p %08x\n", *( (int*) (((void*)(n2p))+(i*4))));
	}
	n2p[nn] = cy;
	nn += adjust;

        printf("d2p[dn-1] = %08lx\nd2p[dn-2] = %08lx\n", d2p[dn-1], d2p[dn-2]);
	invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
        printf("dinv %08lx\n", dinv.inv32);
	my_mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
	for (int i=0; i<nn; i+=1)
	{
		printf("inside qp %08x\n", *( (int*) (((void*)(qp))+(i*4))));
	}
	n2p[nn] = cy;

	mpn_rshift (rp, n2p, dn, cnt);
	TMP_FREE;
	return;

}
Пример #2
0
int
mpfr_urandomb (mpfr_ptr rop, gmp_randstate_t rstate)
{
    mpfr_limb_ptr rp;
    mpfr_prec_t nbits;
    mp_size_t nlimbs;
    mp_size_t k; /* number of high zero limbs */
    mpfr_exp_t exp;
    int cnt;

    rp = MPFR_MANT (rop);
    nbits = MPFR_PREC (rop);
    nlimbs = MPFR_LIMB_SIZE (rop);
    MPFR_SET_POS (rop);
    cnt = nlimbs * GMP_NUMB_BITS - nbits;

    /* Uniform non-normalized significand */
    /* generate exactly nbits so that the random generator stays in the same
       state, independent of the machine word size GMP_NUMB_BITS */
    mpfr_rand_raw (rp, rstate, nbits);
    if (MPFR_LIKELY (cnt != 0)) /* this will put the low bits to zero */
        mpn_lshift (rp, rp, nlimbs, cnt);

    /* Count the null significant limbs and remaining limbs */
    exp = 0;
    k = 0;
    while (nlimbs != 0 && rp[nlimbs - 1] == 0)
    {
        k ++;
        nlimbs --;
        exp -= GMP_NUMB_BITS;
    }

    if (MPFR_LIKELY (nlimbs != 0)) /* otherwise value is zero */
    {
        count_leading_zeros (cnt, rp[nlimbs - 1]);
        /* Normalization */
        if (mpfr_set_exp (rop, exp - cnt))
        {
            /* If the exponent is not in the current exponent range, we
               choose to return a NaN as this is probably a user error.
               Indeed this can happen only if the exponent range has been
               reduced to a very small interval and/or the precision is
               huge (very unlikely). */
            MPFR_SET_NAN (rop);
            __gmpfr_flags |= MPFR_FLAGS_NAN; /* Can't use MPFR_RET_NAN */
            return 1;
        }
        if (cnt != 0)
            mpn_lshift (rp + k, rp, nlimbs, cnt);
        if (k != 0)
            MPN_ZERO (rp, k);
    }
    else
        MPFR_SET_ZERO (rop);

    return 0;
}
Пример #3
0
void
mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
{
  mp_ptr     lp, l1p, f1p;
  mp_size_t  size;
  mp_limb_t  c;
  TMP_DECL;

  ASSERT (ln != lnsub1);

  /* handle small n quickly, and hide the special case for L[-1]=-1 */
  if (n <= FIB_TABLE_LUCNUM_LIMIT)
    {
      mp_limb_t  f  = FIB_TABLE (n);
      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);

      /* L[n] = F[n] + 2F[n-1] */
      PTR(ln)[0] = f + 2*f1;
      SIZ(ln) = 1;

      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */
      PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1);
      SIZ(lnsub1) = (n == 0 ? -1 : 1);

      return;
    }

  TMP_MARK;
  size = MPN_FIB2_SIZE (n);
  f1p = TMP_ALLOC_LIMBS (size);

  MPZ_REALLOC (ln,     size+1);
  MPZ_REALLOC (lnsub1, size+1);
  lp  = PTR(ln);
  l1p = PTR(lnsub1);

  size = mpn_fib2_ui (l1p, f1p, n);

  /* L[n] = F[n] + 2F[n-1] */
#if HAVE_NATIVE_mpn_addlsh1_n
  c = mpn_addlsh1_n (lp, l1p, f1p, size);
#else
  c = mpn_lshift (lp, f1p, size, 1);
  c += mpn_add_n (lp, lp, l1p, size);
#endif
  lp[size] = c;
  SIZ(ln) = size + (c != 0);

  /* L[n-1] = 2F[n] - F[n-1] */
  c = mpn_lshift (l1p, l1p, size, 1);
  c -= mpn_sub_n (l1p, l1p, f1p, size);
  ASSERT ((mp_limb_signed_t) c >= 0);
  l1p[size] = c;
  SIZ(lnsub1) = size + (c != 0);

  TMP_FREE;
}
Пример #4
0
/* Needs n+1 limbs of temporary storage. */
int
mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
{
  mp_limb_t cy;
  int neg;

  ASSERT (x3n > 0);
  ASSERT (x3n <= n);

  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
#if HAVE_NATIVE_mpn_addlsh2_n
  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);

  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
#else /* HAVE_NATIVE_mpn_addlsh_n */
  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);

  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
#endif
  if (x3n < n)
    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
  tp[n] = cy;
#else
  cy = mpn_lshift (tp, xp + 2*n, n, 2);
  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);

  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
  if (x3n < n)
    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
  else
    tp[n] += mpn_add_n (tp, xp + n, tp, n);
#endif
  mpn_lshift (tp, tp, n+1, 1);

  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;

#if HAVE_NATIVE_mpn_sumdiff_n
  if (neg)
    mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1);
  else
    mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1);
#else
  if (neg)
    mpn_sub_n (xm2, tp, xp2, n + 1);
  else
    mpn_sub_n (xm2, xp2, tp, n + 1);

  mpn_add_n (xp2, xp2, tp, n + 1);
#endif

  ASSERT (xp2[n] < 15);
  ASSERT (xm2[n] < 10);

  return neg;
}
Пример #5
0
void
mpfr_random2 (mpfr_ptr x, mp_size_t size, mp_exp_t exp)
{
  mp_size_t xn;
  unsigned long cnt;
  mp_ptr xp = MPFR_MANT(x), yp[1];
  mp_size_t prec = (MPFR_PREC(x) - 1)/BITS_PER_MP_LIMB; 

  MPFR_CLEAR_FLAGS(x);
  xn = ABS (size);
  if (xn != 0)
    {
      if (xn > prec + 1)
	xn = prec + 1;

      mpn_random2 (xp, xn);
    }

  if (exp != 0) {
    /* use mpn_random instead of random since that function is not
       available on all platforms (for example HPUX, DEC OSF, ...) */
    mpn_random ((mp_limb_t*) yp, 1);
    exp = (mp_exp_t) yp[0] % (2 * exp) - exp;
  }

  count_leading_zeros(cnt, xp[xn - 1]); 
  if (cnt) mpn_lshift(xp, xp, xn, cnt); 
  MPFR_EXP(x) = exp-cnt; 
  cnt = xn*BITS_PER_MP_LIMB - prec; 
  /* cnt is the number of non significant bits in the low limb */
  xp[0] &= ~((MP_LIMB_T_ONE << cnt) - MP_LIMB_T_ONE);
}
Пример #6
0
static void fp_double(element_ptr c, element_ptr a) {
	eptr ad = (eptr)a->data, cd = (eptr)c->data;
	if (!ad->flag) {
		cd->flag = 0;
	}
	else {
		fptr p = (fptr)c->field->data;
		const size_t t = p->limbs;
		if (mpn_lshift(cd->d, ad->d, t, 1)) {
			cd->flag = 2;
			// Again, assumes result is not zero.
			mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
		}
		else {
			int i = mpn_cmp(cd->d, p->primelimbs, t);
			if (!i) {
				cd->flag = 0;
			}
			else {
				cd->flag = 2;
				if (i > 0) {
					mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
				}
			}
		}
	}
}
Пример #7
0
int
mpfr_urandomb (mpfr_ptr rop, gmp_randstate_t rstate)
{
  mp_ptr rp;
  mp_prec_t nbits;
  mp_size_t nlimbs;
  mp_size_t k; /* number of high zero limbs */
  mp_exp_t exp;
  int cnt;

  MPFR_CLEAR_FLAGS (rop);

  rp = MPFR_MANT (rop);
  nbits = MPFR_PREC (rop);
  nlimbs = MPFR_LIMB_SIZE (rop);
  MPFR_SET_POS (rop);

  /* Uniform non-normalized significand */
  _gmp_rand (rp, rstate, nlimbs * BITS_PER_MP_LIMB);

  /* If nbits isn't a multiple of BITS_PER_MP_LIMB, mask the low bits */
  cnt = nlimbs * BITS_PER_MP_LIMB - nbits;
  if (MPFR_LIKELY (cnt != 0))
    rp[0] &= ~MPFR_LIMB_MASK (cnt);

  /* Count the null significant limbs and remaining limbs */
  exp = 0;
  k = 0;
  while (nlimbs != 0 && rp[nlimbs - 1] == 0)
    {
      k ++;
      nlimbs --;
      exp -= BITS_PER_MP_LIMB;
    }

  if (MPFR_LIKELY (nlimbs != 0)) /* otherwise value is zero */
    {
      count_leading_zeros (cnt, rp[nlimbs - 1]);
      /* Normalization */
      if (mpfr_set_exp (rop, exp - cnt))
        {
          /* If the exponent is not in the current exponent range, we
             choose to return a NaN as this is probably a user error.
             Indeed this can happen only if the exponent range has been
             reduced to a very small interval and/or the precision is
             huge (very unlikely). */
          MPFR_SET_NAN (rop);
          __gmpfr_flags |= MPFR_FLAGS_NAN; /* Can't use MPFR_RET_NAN */
          return 1;
        }
      if (cnt != 0)
        mpn_lshift (rp + k, rp, nlimbs, cnt);
      if (k != 0)
        MPN_ZERO (rp, k);
    }
  else
    MPFR_SET_ZERO (rop);

  return 0;
}
Пример #8
0
/*
 * Set f to z, choosing the smallest precision for f
 * so that z = f*(2^BPML)*zs*2^(RetVal)
 */
static int
set_z (mpfr_ptr f, mpz_srcptr z, mp_size_t *zs)
{
  mp_limb_t *p;
  mp_size_t s;
  int c;
  mp_prec_t pf;

  MPFR_ASSERTD (mpz_sgn (z) != 0);

  /* Remove useless ending 0 */
  for (p = PTR (z), s = *zs = ABS (SIZ (z)) ; *p == 0; p++, s--)
    MPFR_ASSERTD (s >= 0);

  /* Get working precision */
  count_leading_zeros (c, p[s-1]);
  pf = s * BITS_PER_MP_LIMB - c;
  if (pf < MPFR_PREC_MIN)
    pf = MPFR_PREC_MIN;
  mpfr_init2 (f, pf);

  /* Copy Mantissa */
  if (MPFR_LIKELY (c))
    mpn_lshift (MPFR_MANT (f), p, s, c);
  else
    MPN_COPY (MPFR_MANT (f), p, s);

  MPFR_SET_SIGN (f, mpz_sgn (z));
  MPFR_SET_EXP (f, 0);

  return -c;
}
Пример #9
0
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {np, n}^2. The error is less than n ulps of rp[n]. */
void
mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
{
  mp_size_t k;

  MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
  k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
    : (n+4)/2; /* ensures that k >= (n+3)/2 */
  MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  if (k < 0)
    /* we can't use mpn_sqr_basecase here, since it requires
       n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
       is not exported by GMP */
    mpn_sqr_n (rp, np, n);
  else if (k == 0)
    mpfr_mulhigh_n_basecase (rp, np, np, n);
  else
    {
      mp_size_t l = n - k;
      mp_limb_t cy;

      mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
      mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
      /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
      cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
      cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
      mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
    }
}
Пример #10
0
/* set f to the integer z multiplied by 2^e */
int
mpfr_set_z_2exp (mpfr_ptr f, mpz_srcptr z, mpfr_exp_t e, mpfr_rnd_t rnd_mode)
{
    mp_size_t fn, zn, dif, en;
    int k, sign_z, inex;
    mp_limb_t *fp, *zp;
    mpfr_exp_t exp;

    sign_z = mpz_sgn (z);
    if (MPFR_UNLIKELY (sign_z == 0)) /* ignore the exponent for 0 */
    {
        MPFR_SET_ZERO(f);
        MPFR_SET_POS(f);
        MPFR_RET(0);
    }
    MPFR_ASSERTD (sign_z == MPFR_SIGN_POS || sign_z == MPFR_SIGN_NEG);

    zn = ABS(SIZ(z)); /* limb size of z */
    /* compute en = floor(e/GMP_NUMB_BITS) */
    en = (e >= 0) ? e / GMP_NUMB_BITS : (e + 1) / GMP_NUMB_BITS - 1;
    MPFR_ASSERTD (zn >= 1);
    if (MPFR_UNLIKELY (zn + en > MPFR_EMAX_MAX / GMP_NUMB_BITS + 1))
        return mpfr_overflow (f, rnd_mode, sign_z);
    /* because zn + en >= MPFR_EMAX_MAX / GMP_NUMB_BITS + 2
       implies (zn + en) * GMP_NUMB_BITS >= MPFR_EMAX_MAX + GMP_NUMB_BITS + 1
       and exp = zn * GMP_NUMB_BITS + e - k
               >= (zn + en) * GMP_NUMB_BITS - k > MPFR_EMAX_MAX */

    fp = MPFR_MANT (f);
    fn = MPFR_LIMB_SIZE (f);
    dif = zn - fn;
    zp = PTR(z);
    count_leading_zeros (k, zp[zn-1]);

    /* now zn + en <= MPFR_EMAX_MAX / GMP_NUMB_BITS + 1
       thus (zn + en) * GMP_NUMB_BITS <= MPFR_EMAX_MAX + GMP_NUMB_BITS
       and exp = zn * GMP_NUMB_BITS + e - k
               <= (zn + en) * GMP_NUMB_BITS - k + GMP_NUMB_BITS - 1
               <= MPFR_EMAX_MAX + 2 * GMP_NUMB_BITS - 1 */
    exp = (mpfr_prec_t) zn * GMP_NUMB_BITS + e - k;
    /* The exponent will be exp or exp + 1 (due to rounding) */
    if (MPFR_UNLIKELY (exp > __gmpfr_emax))
        return mpfr_overflow (f, rnd_mode, sign_z);
    if (MPFR_UNLIKELY (exp + 1 < __gmpfr_emin))
        return mpfr_underflow (f, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
                               sign_z);

    if (MPFR_LIKELY (dif >= 0))
    {
        mp_limb_t rb, sb, ulp;
        int sh;

        /* number has to be truncated */
        if (MPFR_LIKELY (k != 0))
        {
            mpn_lshift (fp, &zp[dif], fn, k);
            if (MPFR_LIKELY (dif > 0))
                fp[0] |= zp[dif - 1] >> (GMP_NUMB_BITS - k);
        }
Пример #11
0
static void fp_double(element_ptr r, element_ptr a) {
  fp_field_data_ptr p = r->field->data;
  const size_t t = p->limbs;
  if (mpn_lshift(r->data, a->data, t, 1)
      || mpn_cmp(r->data, p->primelimbs, t) >= 0) {
    mpn_sub_n(r->data, r->data, p->primelimbs, t);
  }
}
Пример #12
0
void
mpfr_urandomb (mpfr_ptr rop, gmp_randstate_t rstate)
{
  mp_ptr rp;
  mp_size_t nlimbs;
  mp_exp_t exp;
  unsigned long cnt, nbits;

  MPFR_CLEAR_FLAGS(rop);

  rp = MPFR_MANT(rop);
  nbits = MPFR_PREC(rop);
  nlimbs = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;

  _gmp_rand (rp, rstate, nbits);

  /* If nbits isn't a multiple of BITS_PER_MP_LIMB, shift up.  */
  if (nlimbs != 0)
    {
      if (nbits % BITS_PER_MP_LIMB != 0)
	mpn_lshift (rp, rp, nlimbs,
		    BITS_PER_MP_LIMB - nbits % BITS_PER_MP_LIMB);
    }

  exp = 0;
  while (nlimbs != 0 && rp[nlimbs - 1] == 0)
    {
      nlimbs--;
      exp--;
    }

  if (nlimbs != 0) /* otherwise value is zero */
    {
      count_leading_zeros (cnt, rp[nlimbs - 1]);
      if (cnt) mpn_lshift (rp, rp, nlimbs, cnt); 
      exp -= cnt;

      cnt = nlimbs*BITS_PER_MP_LIMB - nbits; 
      /* cnt is the number of non significant bits in the low limb */
      rp[0] &= ~((MP_LIMB_T_ONE << cnt) - 1);
    } 

  MPFR_EXP (rop) = exp;
}
static mp_limb_t
DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift(ws,src,n,s);
  return    __cy + mpn_add_n(dst,dst,ws,n);
#endif
}
Пример #14
0
/* truncates, returns inexact */
int
_arf_get_integer_mpn(mp_ptr y, mp_srcptr x, mp_size_t xn, slong exp)
{
    slong bot_exp = exp - xn * FLINT_BITS;

    if (bot_exp >= 0)
    {
        mp_size_t bot_limbs;
        mp_bitcnt_t bot_bits;

        bot_limbs = bot_exp / FLINT_BITS;
        bot_bits = bot_exp % FLINT_BITS;

        flint_mpn_zero(y, bot_limbs);

        if (bot_bits == 0)
            flint_mpn_copyi(y + bot_limbs, x, xn);
        else
            y[bot_limbs + xn] = mpn_lshift(y + bot_limbs, x, xn, bot_bits);

        /* exact */
        return 0;
    }
    else if (exp <= 0)
    {
        /* inexact */
        return 1;
    }
    else
    {
        mp_size_t top_limbs;
        mp_bitcnt_t top_bits;
        mp_limb_t cy;

        top_limbs = exp / FLINT_BITS;
        top_bits = exp % FLINT_BITS;

        if (top_bits == 0)
        {
            flint_mpn_copyi(y, x + xn - top_limbs, top_limbs);
            /* inexact */
            return 1;
        }
        else
        {
            /* can be inexact */
            cy = mpn_rshift(y, x + xn - top_limbs - 1,
                            top_limbs + 1, FLINT_BITS - top_bits);

            return (cy != 0) || (top_limbs + 1 != xn);
        }
    }
}
Пример #15
0
void tc4_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_size_t bits)
{
   if (xn == 0) *rn = 0;
   else
	{
		mp_size_t xu = ABS(xn);
		mp_limb_t msl = mpn_lshift(rp, xp, xu, bits);
      if (msl) 
		{
			rp[xu] = msl;
			*rn = (xn >= 0 ? xn + 1 : xn - 1);
		} else
		   *rn = xn;
	}
}
Пример #16
0
/* set f to the integer z */
int 
mpfr_set_z (mpfr_ptr f, mpz_srcptr z, mp_rnd_t rnd_mode)
{
  mp_size_t fn, zn, dif;
  int k, sign_z, inex;
  mp_limb_t *fp, *zp;
  mp_exp_t exp;

  MPFR_CLEAR_FLAGS (f); /* z cannot be NaN nor Inf */

  sign_z = mpz_cmp_ui (z, 0);

  if (sign_z == 0)
    {
      MPFR_SET_ZERO(f);
      MPFR_SET_POS(f);
      MPFR_RET(0);
    }

  fp = MPFR_MANT(f);
  fn = 1 + (MPFR_PREC(f) - 1) / BITS_PER_MP_LIMB;
  zn = ABS(SIZ(z));
  dif = zn - fn;
  zp = PTR(z);
  count_leading_zeros(k, zp[zn-1]);

  exp = (mp_prec_t) zn * BITS_PER_MP_LIMB - k;
  /* The exponent will be exp or exp + 1 (due to rounding) */
  if (exp > __mpfr_emax)
    return mpfr_set_overflow(f, rnd_mode, sign_z);
  if (exp + 1 < __mpfr_emin)
    return mpfr_set_underflow(f, rnd_mode, sign_z);

  if (MPFR_SIGN(f) * sign_z < 0)
    MPFR_CHANGE_SIGN(f);

  if (dif >= 0)
    {
      mp_limb_t cc;
      int sh;

      /* number has to be truncated */
      if (k != 0)
        {
          mpn_lshift(fp, zp + dif, fn, k);
          if (dif != 0)
            fp[0] += zp[dif - 1] >> (BITS_PER_MP_LIMB - k);
        }
Пример #17
0
/* Define our own squaring function, which uses mpn_sqr_basecase for its
   allowed sizes, but its own code for larger sizes.  */
static void
mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
{
  mp_size_t i;

  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));

  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
    {
      mpn_sqr_basecase (rp, up, n);
      return;
    }

  {
    mp_limb_t ul, lpl;
    ul = up[0];
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    rp[0] = lpl >> GMP_NAIL_BITS;
  }
  if (n > 1)
    {
      mp_limb_t cy;

      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
      tp[n - 1] = cy;
      for (i = 2; i < n; i++)
	{
	  mp_limb_t cy;
	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
	  tp[n + i - 2] = cy;
	}
      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);

      {
	mp_limb_t cy;
#if HAVE_NATIVE_mpn_addlsh1_n
	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
#else
	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
#endif
	rp[2 * n - 1] += cy;
      }
    }
}
Пример #18
0
void
mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
{
  mp_size_t usize = u->_mp_size;
  mp_size_t abs_usize = ABS (usize);
  mp_size_t wsize;
  mp_size_t limb_cnt;
  mp_ptr wp;
  mp_limb_t wlimb;

  if (usize == 0)
    {
      w->_mp_size = 0;
      return;
    }

  limb_cnt = cnt / GMP_NUMB_BITS;
  wsize = abs_usize + limb_cnt + 1;
  if (w->_mp_alloc < wsize)
    _mpz_realloc (w, wsize);

  wp = w->_mp_d;
  wsize = abs_usize + limb_cnt;

  cnt %= GMP_NUMB_BITS;
  if (cnt != 0)
    {
      wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
      if (wlimb != 0)
	{
	  wp[wsize] = wlimb;
	  wsize++;
	}
    }
  else
    {
      MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
    }

  /* Zero all whole limbs at low end.  Do it here and not before calling
     mpn_lshift, not to lose for U == W.  */
  MPN_ZERO (wp, limb_cnt);

  w->_mp_size = usize >= 0 ? wsize : -wsize;
}
Пример #19
0
/* Perform left-shift operation on MPN
 *
 * pre-conditions:
 *  - 0 < count
 *  - rn = sn + ceil(count / GMP_NUMB_BITS)
 *  - sn > 0
 *
 * return value: most-significant limb stored in {rp,rn} result
 */
mp_limb_t
integer_gmp_mpn_lshift (mp_limb_t rp[], const mp_limb_t sp[],
                        const mp_size_t sn, const mp_bitcnt_t count)
{
  const mp_size_t    limb_shift = count / GMP_NUMB_BITS;
  const unsigned int bit_shift  = count % GMP_NUMB_BITS;
  const mp_size_t    rn0        = sn + limb_shift;

  memset(rp, 0, limb_shift*sizeof(mp_limb_t));
  if (bit_shift) {
    const mp_limb_t msl = mpn_lshift(&rp[limb_shift], sp, sn, bit_shift);
    rp[rn0] = msl;
    return msl;
  } else {
    memcpy(&rp[limb_shift], sp, sn*sizeof(mp_limb_t));
    return rp[rn0-1];
  }
}
Пример #20
0
/* FIXME: Remove.  */
unsigned long int
lc_test (mp_ptr rp, gmp_randstate_t s, const int evenbits)
{
  unsigned long int rn, nbits;
  int f;

  nbits = s->_mp_algdata._mp_lc->_mp_m2exp / 2;
  rn = nbits / GMP_NUMB_BITS + (nbits % GMP_NUMB_BITS != 0);
  MPN_ZERO (rp, rn);

  for (f = 0; f < nbits; f++)
    {
      mpn_lshift (rp, rp, rn, 1);
      if (f % 2 == ! evenbits)
	rp[0] += 1;
    }

  return nbits;
}
Пример #21
0
Файл: fmpz.c Проект: hperl/flint
void fmpz_mul_2exp(fmpz_t output, fmpz_t x, unsigned long exp)
{
   unsigned long limbs = (exp >> FLINT_LG_BITS_PER_LIMB);
   unsigned long bits = (exp & (FLINT_BITS - 1));
   mp_limb_t msl = 0L;
   
   if (x[0] == 0) 
   {
      output[0] = 0L;
      return;
   }
   
   if (bits) 
   {
      msl = mpn_lshift(output + limbs + 1, x + 1, FLINT_ABS(x[0]), bits);
      if (msl) output[limbs + FLINT_ABS(x[0]) + 1] = msl; 
   } else F_mpn_copy(output + limbs + 1, x + 1, FLINT_ABS(x[0]));
   if (limbs) F_mpn_clear(output + 1, limbs);
   if ((long) x[0] >= 0L) output[0] = x[0] + limbs + (msl != 0L);
   else output[0] = x[0] - limbs - (msl != 0L);
}
Пример #22
0
/* returns 0 if result exact, non-zero otherwise */
int
mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mp_rnd_t rnd_mode)
{
  long int xn, yn, dif, sh, i;
  mp_limb_t *xp, *yp, *tmp, c, d;
  mp_exp_t exp;
  int inexact, middle = 1;
  TMP_DECL(marker);

  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ))
    {
      if (MPFR_IS_NAN(x))
	{
	  MPFR_SET_NAN(y);
	  MPFR_RET_NAN;
	}
      else if (MPFR_IS_INF(x))
	{
	  MPFR_SET_INF(y);
	  MPFR_SET_SAME_SIGN(y, x);
	  MPFR_RET(0);
	}
      else
	{
          MPFR_ASSERTD(MPFR_IS_ZERO(x));
	  if (u == 0)/* 0/0 is NaN */
	    {
	      MPFR_SET_NAN(y);
	      MPFR_RET_NAN;
	    }
	  else
	    {
	      MPFR_SET_ZERO(y);
	      MPFR_RET(0);
	    }
	}
    }

  if (MPFR_UNLIKELY(u == 0))
    {
      /* x/0 is Inf */
      MPFR_SET_INF(y);
      MPFR_SET_SAME_SIGN(y, x);
      MPFR_RET(0);
    }

  MPFR_CLEAR_FLAGS(y);

  MPFR_SET_SAME_SIGN(y, x);

  TMP_MARK(marker);
  xn = MPFR_LIMB_SIZE(x);
  yn = MPFR_LIMB_SIZE(y);

  xp = MPFR_MANT(x);
  yp = MPFR_MANT(y);
  exp = MPFR_GET_EXP (x);

  dif = yn + 1 - xn;

  /* we need to store yn+1 = xn + dif limbs of the quotient */
  /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */
  tmp = (mp_limb_t*) TMP_ALLOC((yn + 1) * BYTES_PER_MP_LIMB);

  c = (mp_limb_t) u;
  MPFR_ASSERTN(u == c);
  if (dif >= 0)
    c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */
  else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */
    c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c);

  inexact = (c != 0);

  /* First pass in estimating next bit of the quotient, in case of RNDN    *
   * In case we just have the right number of bits (postpone this ?),      *
   * we need to check whether the remainder is more or less than half      *
   * the divisor. The test must be performed with a subtraction, so as     *
   * to prevent carries.                                                   */

  if (rnd_mode == GMP_RNDN)
    {
      if (c < (mp_limb_t) u - c) /* We have u > c */
	middle = -1;
      else if (c > (mp_limb_t) u - c)
	middle = 1;
      else
	middle = 0; /* exactly in the middle */
    }

  /* If we believe that we are right in the middle or exact, we should check
     that we did not neglect any word of x (division large / 1 -> small). */

  for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++)
    if (xp[i])
      inexact = middle = 1; /* larger than middle */

  /*
     If the high limb of the result is 0 (xp[xn-1] < u), remove it.
     Otherwise, compute the left shift to be performed to normalize.
     In the latter case, we discard some low bits computed. They
     contain information useful for the rounding, hence the updating
     of middle and inexact.
  */

  if (tmp[yn] == 0)
    {
      MPN_COPY(yp, tmp, yn);
      exp -= BITS_PER_MP_LIMB;
      sh = 0;
    }
  else
    {
      count_leading_zeros (sh, tmp[yn]);

      /* shift left to normalize */
      if (sh)
        {
          mp_limb_t w = tmp[0] << sh;

          mpn_lshift (yp, tmp + 1, yn, sh);
          yp[0] += tmp[0] >> (BITS_PER_MP_LIMB - sh);

          if (w > (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = 1; }
          else if (w < (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1)))
            { middle = -1; }
          else
            { middle = (c != 0); }

          inexact = inexact || (w != 0);
          exp -= sh;
        }
      else
        { /* this happens only if u == 1 and xp[xn-1] >=
             1<<(BITS_PER_MP_LIMB-1). It might be better to handle the
             u == 1 case seperately ?
          */

          MPN_COPY (yp, tmp + 1, yn);
        }
    }
Пример #23
0
void
mpn_toom53_mul (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t n, s, t;
  int vm1_neg, vmh_neg;
  mp_limb_t cy;
  mp_ptr gp, hp;
  mp_ptr as1, asm1, as2, ash, asmh;
  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
  enum toom4_flags flags;
  TMP_DECL;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define a3  (ap + 3*n)
#define a4  (ap + 4*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);

  s = an - 4 * n;
  t = bn - 2 * n;

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  TMP_MARK;

  as1  = TMP_SALLOC_LIMBS (n + 1);
  asm1 = TMP_SALLOC_LIMBS (n + 1);
  as2  = TMP_SALLOC_LIMBS (n + 1);
  ash  = TMP_SALLOC_LIMBS (n + 1);
  asmh = TMP_SALLOC_LIMBS (n + 1);

  bs1  = TMP_SALLOC_LIMBS (n + 1);
  bsm1 = TMP_SALLOC_LIMBS (n + 1);
  bs2  = TMP_SALLOC_LIMBS (n + 1);
  bsh  = TMP_SALLOC_LIMBS (n + 1);
  bsmh = TMP_SALLOC_LIMBS (n + 1);

  gp = pp;
  hp = pp + n + 1;

  /* Compute as1 and asm1.  */
  gp[n]  = mpn_add_n (gp, a0, a2, n);
  gp[n] += mpn_add   (gp, gp, n, a4, s);
  hp[n]  = mpn_add_n (hp, a1, a3, n);
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#else
  mpn_add_n (as1, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_sub_n (asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#endif

  /* Compute as2.  */
#if !HAVE_NATIVE_mpn_addlsh_n
  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
#endif
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a3, a4, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy  = mpn_lshift (as2, a4, s, 1);
  cy += mpn_add_n (as2, a3, as2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
  cy += mpn_add_n (as2, a1, as2, n);
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
  mpn_add_n (as2, ash, as2, n + 1);
#endif

  /* Compute ash and asmh.  */
#if HAVE_NATIVE_mpn_addlsh_n
  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
  gp[n] = cy;
  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
  hp[n] = cy;
#else
  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
  hp[n] = cy;
#endif
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#else
  mpn_add_n (ash, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_sub_n (asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#endif

  /* Compute bs1 and bsm1.  */
  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
#if HAVE_NATIVE_mpn_addsub_n
  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
    {
      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
Пример #24
0
int
mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
{
  mpfr_exp_t bx,cx;
  mpfr_uexp_t d;
  mpfr_prec_t p, sh, cnt;
  mp_size_t n;
  mp_limb_t *ap, *bp, *cp;
  mp_limb_t limb;
  int inexact;
  mp_limb_t bcp,bcp1; /* Cp and C'p+1 */
  mp_limb_t bbcp = (mp_limb_t) -1, bbcp1 = (mp_limb_t) -1; /* Cp+1 and C'p+2,
    gcc claims that they might be used uninitialized. We fill them with invalid
    values, which should produce a failure if so. See README.dev file. */

  MPFR_TMP_DECL(marker);

  MPFR_TMP_MARK(marker);

  MPFR_ASSERTD(MPFR_PREC(a) == MPFR_PREC(b) && MPFR_PREC(b) == MPFR_PREC(c));
  MPFR_ASSERTD(MPFR_IS_PURE_FP(b));
  MPFR_ASSERTD(MPFR_IS_PURE_FP(c));

  /* Read prec and num of limbs */
  p = MPFR_PREC (b);
  n = MPFR_PREC2LIMBS (p);

  /* Fast cmp of |b| and |c|*/
  bx = MPFR_GET_EXP (b);
  cx = MPFR_GET_EXP (c);
  if (MPFR_UNLIKELY(bx == cx))
    {
      mp_size_t k = n - 1;
      /* Check mantissa since exponent are equals */
      bp = MPFR_MANT(b);
      cp = MPFR_MANT(c);
      while (k>=0 && MPFR_UNLIKELY(bp[k] == cp[k]))
        k--;
      if (MPFR_UNLIKELY(k < 0))
        /* b == c ! */
        {
          /* Return exact number 0 */
          if (rnd_mode == MPFR_RNDD)
            MPFR_SET_NEG(a);
          else
            MPFR_SET_POS(a);
          MPFR_SET_ZERO(a);
          MPFR_RET(0);
        }
      else if (bp[k] > cp[k])
        goto BGreater;
      else
        {
          MPFR_ASSERTD(bp[k]<cp[k]);
          goto CGreater;
        }
    }
  else if (MPFR_UNLIKELY(bx < cx))
    {
      /* Swap b and c and set sign */
      mpfr_srcptr t;
      mpfr_exp_t tx;
    CGreater:
      MPFR_SET_OPPOSITE_SIGN(a,b);
      t  = b;  b  = c;  c  = t;
      tx = bx; bx = cx; cx = tx;
    }
  else
    {
      /* b > c */
    BGreater:
      MPFR_SET_SAME_SIGN(a,b);
    }

  /* Now b > c */
  MPFR_ASSERTD(bx >= cx);
  d = (mpfr_uexp_t) bx - cx;
  DEBUG (printf ("New with diff=%lu\n", (unsigned long) d));

  if (MPFR_UNLIKELY(d <= 1))
    {
      if (MPFR_LIKELY(d < 1))
        {
          /* <-- b -->
             <-- c --> : exact sub */
          ap = MPFR_MANT(a);
          mpn_sub_n (ap, MPFR_MANT(b), MPFR_MANT(c), n);
          /* Normalize */
        ExactNormalize:
          limb = ap[n-1];
          if (MPFR_LIKELY(limb))
            {
              /* First limb is not zero. */
              count_leading_zeros(cnt, limb);
              /* cnt could be == 0 <= SubD1Lose */
              if (MPFR_LIKELY(cnt))
                {
                  mpn_lshift(ap, ap, n, cnt); /* Normalize number */
                  bx -= cnt; /* Update final expo */
                }
              /* Last limb should be ok */
              MPFR_ASSERTD(!(ap[0] & MPFR_LIMB_MASK((unsigned int) (-p)
                                                    % GMP_NUMB_BITS)));
            }
          else
            {
              /* First limb is zero */
              mp_size_t k = n-1, len;
              /* Find the first limb not equal to zero.
                 FIXME:It is assume it exists (since |b| > |c| and same prec)*/
              do
                {
                  MPFR_ASSERTD( k > 0 );
                  limb = ap[--k];
                }
              while (limb == 0);
              MPFR_ASSERTD(limb != 0);
              count_leading_zeros(cnt, limb);
              k++;
              len = n - k; /* Number of last limb */
              MPFR_ASSERTD(k >= 0);
              if (MPFR_LIKELY(cnt))
                mpn_lshift(ap+len, ap, k, cnt); /* Normalize the High Limb*/
              else
                {
                  /* Must use DECR since src and dest may overlap & dest>=src*/
                  MPN_COPY_DECR(ap+len, ap, k);
                }
              MPN_ZERO(ap, len); /* Zeroing the last limbs */
              bx -= cnt + len*GMP_NUMB_BITS; /* Update Expo */
              /* Last limb should be ok */
              MPFR_ASSERTD(!(ap[len]&MPFR_LIMB_MASK((unsigned int) (-p)
                                                    % GMP_NUMB_BITS)));
            }
          /* Check expo underflow */
          if (MPFR_UNLIKELY(bx < __gmpfr_emin))
            {
              MPFR_TMP_FREE(marker);
              /* inexact=0 */
              DEBUG( printf("(D==0 Underflow)\n") );
              if (rnd_mode == MPFR_RNDN &&
                  (bx < __gmpfr_emin - 1 ||
                   (/*inexact >= 0 &&*/ mpfr_powerof2_raw (a))))
                rnd_mode = MPFR_RNDZ;
              return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
            }
          MPFR_SET_EXP (a, bx);
          /* No rounding is necessary since the result is exact */
          MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
          MPFR_TMP_FREE(marker);
          return 0;
        }
      else /* if (d == 1) */
        {
          /* | <-- b -->
             |  <-- c --> */
          mp_limb_t c0, mask;
          mp_size_t k;
          MPFR_UNSIGNED_MINUS_MODULO(sh, p);
          /* If we lose at least one bit, compute 2*b-c (Exact)
           * else compute b-c/2 */
          bp = MPFR_MANT(b);
          cp = MPFR_MANT(c);
          k = n-1;
          limb = bp[k] - cp[k]/2;
          if (limb > MPFR_LIMB_HIGHBIT)
            {
              /* We can't lose precision: compute b-c/2 */
              /* Shift c in the allocated temporary block */
            SubD1NoLose:
              c0 = cp[0] & (MPFR_LIMB_ONE<<sh);
              cp = MPFR_TMP_LIMBS_ALLOC (n);
              mpn_rshift(cp, MPFR_MANT(c), n, 1);
              if (MPFR_LIKELY(c0 == 0))
                {
                  /* Result is exact: no need of rounding! */
                  ap = MPFR_MANT(a);
                  mpn_sub_n (ap, bp, cp, n);
                  MPFR_SET_EXP(a, bx); /* No expo overflow! */
                  /* No truncate or normalize is needed */
                  MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
                  /* No rounding is necessary since the result is exact */
                  MPFR_TMP_FREE(marker);
                  return 0;
                }
              ap = MPFR_MANT(a);
              mask = ~MPFR_LIMB_MASK(sh);
              cp[0] &= mask; /* Delete last bit of c */
              mpn_sub_n (ap, bp, cp, n);
              MPFR_SET_EXP(a, bx);                 /* No expo overflow! */
              MPFR_ASSERTD( !(ap[0] & ~mask) );    /* Check last bits */
              /* No normalize is needed */
              MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
              /* Rounding is necessary since c0 = 1*/
              /* Cp =-1 and C'p+1=0 */
              bcp = 1; bcp1 = 0;
              if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
                {
                  /* Even Rule apply: Check Ap-1 */
                  if (MPFR_LIKELY( (ap[0] & (MPFR_LIMB_ONE<<sh)) == 0) )
                    goto truncate;
                  else
                    goto sub_one_ulp;
                }
              MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
              if (rnd_mode == MPFR_RNDZ)
                goto sub_one_ulp;
              else
                goto truncate;
            }
          else if (MPFR_LIKELY(limb < MPFR_LIMB_HIGHBIT))
            {
              /* We lose at least one bit of prec */
              /* Calcul of 2*b-c (Exact) */
              /* Shift b in the allocated temporary block */
            SubD1Lose:
              bp = MPFR_TMP_LIMBS_ALLOC (n);
              mpn_lshift (bp, MPFR_MANT(b), n, 1);
              ap = MPFR_MANT(a);
              mpn_sub_n (ap, bp, cp, n);
              bx--;
              goto ExactNormalize;
            }
          else
            {
              /* Case: limb = 100000000000 */
              /* Check while b[k] == c'[k] (C' is C shifted by 1) */
              /* If b[k]<c'[k] => We lose at least one bit*/
              /* If b[k]>c'[k] => We don't lose any bit */
              /* If k==-1 => We don't lose any bit
                 AND the result is 100000000000 0000000000 00000000000 */
              mp_limb_t carry;
              do {
                carry = cp[k]&MPFR_LIMB_ONE;
                k--;
              } while (k>=0 &&
                       bp[k]==(carry=cp[k]/2+(carry<<(GMP_NUMB_BITS-1))));
              if (MPFR_UNLIKELY(k<0))
                {
                  /*If carry then (sh==0 and Virtual c'[-1] > Virtual b[-1]) */
                  if (MPFR_UNLIKELY(carry)) /* carry = cp[0]&MPFR_LIMB_ONE */
                    {
                      /* FIXME: Can be faster? */
                      MPFR_ASSERTD(sh == 0);
                      goto SubD1Lose;
                    }
                  /* Result is a power of 2 */
                  ap = MPFR_MANT (a);
                  MPN_ZERO (ap, n);
                  ap[n-1] = MPFR_LIMB_HIGHBIT;
                  MPFR_SET_EXP (a, bx); /* No expo overflow! */
                  /* No Normalize is needed*/
                  /* No Rounding is needed */
                  MPFR_TMP_FREE (marker);
                  return 0;
                }
              /* carry = cp[k]/2+(cp[k-1]&1)<<(GMP_NUMB_BITS-1) = c'[k]*/
              else if (bp[k] > carry)
                goto SubD1NoLose;
              else
                {
                  MPFR_ASSERTD(bp[k]<carry);
                  goto SubD1Lose;
                }
            }
        }
    }
  else if (MPFR_UNLIKELY(d >= p))
    {
      ap = MPFR_MANT(a);
      MPFR_UNSIGNED_MINUS_MODULO(sh, p);
      /* We can't set A before since we use cp for rounding... */
      /* Perform rounding: check if a=b or a=b-ulp(b) */
      if (MPFR_UNLIKELY(d == p))
        {
          /* cp == -1 and c'p+1 = ? */
          bcp  = 1;
          /* We need Cp+1 later for a very improbable case. */
          bbcp = (MPFR_MANT(c)[n-1] & (MPFR_LIMB_ONE<<(GMP_NUMB_BITS-2)));
          /* We need also C'p+1 for an even more unprobable case... */
          if (MPFR_LIKELY( bbcp ))
            bcp1 = 1;
          else
            {
              cp = MPFR_MANT(c);
              if (MPFR_UNLIKELY(cp[n-1] == MPFR_LIMB_HIGHBIT))
                {
                  mp_size_t k = n-1;
                  do {
                    k--;
                  } while (k>=0 && cp[k]==0);
                  bcp1 = (k>=0);
                }
              else
                bcp1 = 1;
            }
          DEBUG( printf("(D=P) Cp=-1 Cp+1=%d C'p+1=%d \n", bbcp!=0, bcp1!=0) );
          bp = MPFR_MANT (b);

          /* Even if src and dest overlap, it is ok using MPN_COPY */
          if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
            {
              if (MPFR_UNLIKELY( bcp && bcp1==0 ))
                /* Cp=-1 and C'p+1=0: Even rule Apply! */
                /* Check Ap-1 = Bp-1 */
                if ((bp[0] & (MPFR_LIMB_ONE<<sh)) == 0)
                  {
                    MPN_COPY(ap, bp, n);
                    goto truncate;
                  }
              MPN_COPY(ap, bp, n);
              goto sub_one_ulp;
            }
          MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
          if (rnd_mode == MPFR_RNDZ)
            {
              MPN_COPY(ap, bp, n);
              goto sub_one_ulp;
            }
          else
            {
              MPN_COPY(ap, bp, n);
              goto truncate;
            }
        }
      else
        {
          /* Cp=0, Cp+1=-1 if d==p+1, C'p+1=-1 */
          bcp = 0; bbcp = (d==p+1); bcp1 = 1;
          DEBUG( printf("(D>P) Cp=%d Cp+1=%d C'p+1=%d\n", bcp!=0,bbcp!=0,bcp1!=0) );
          /* Need to compute C'p+2 if d==p+1 and if rnd_mode=NEAREST
             (Because of a very improbable case) */
          if (MPFR_UNLIKELY(d==p+1 && rnd_mode==MPFR_RNDN))
            {
              cp = MPFR_MANT(c);
              if (MPFR_UNLIKELY(cp[n-1] == MPFR_LIMB_HIGHBIT))
                {
                  mp_size_t k = n-1;
                  do {
                    k--;
                  } while (k>=0 && cp[k]==0);
                  bbcp1 = (k>=0);
                }
              else
                bbcp1 = 1;
              DEBUG( printf("(D>P) C'p+2=%d\n", bbcp1!=0) );
            }
          /* Copy mantissa B in A */
          MPN_COPY(ap, MPFR_MANT(b), n);
          /* Round */
          if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
            goto truncate;
          MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
          if (rnd_mode == MPFR_RNDZ)
            goto sub_one_ulp;
          else /* rnd_mode = AWAY */
            goto truncate;
        }
    }
  else
    {
      mpfr_uexp_t dm;
      mp_size_t m;
      mp_limb_t mask;

      /* General case: 2 <= d < p */
      MPFR_UNSIGNED_MINUS_MODULO(sh, p);
      cp = MPFR_TMP_LIMBS_ALLOC (n);

      /* Shift c in temporary allocated place */
      dm = d % GMP_NUMB_BITS;
      m = d / GMP_NUMB_BITS;
      if (MPFR_UNLIKELY(dm == 0))
        {
          /* dm = 0 and m > 0: Just copy */
          MPFR_ASSERTD(m!=0);
          MPN_COPY(cp, MPFR_MANT(c)+m, n-m);
          MPN_ZERO(cp+n-m, m);
        }
      else if (MPFR_LIKELY(m == 0))
        {
          /* dm >=2 and m == 0: just shift */
          MPFR_ASSERTD(dm >= 2);
          mpn_rshift(cp, MPFR_MANT(c), n, dm);
        }
      else
        {
          /* dm > 0 and m > 0: shift and zero  */
          mpn_rshift(cp, MPFR_MANT(c)+m, n-m, dm);
          MPN_ZERO(cp+n-m, m);
        }

      DEBUG( mpfr_print_mant_binary("Before", MPFR_MANT(c), p) );
      DEBUG( mpfr_print_mant_binary("B=    ", MPFR_MANT(b), p) );
      DEBUG( mpfr_print_mant_binary("After ", cp, p) );

      /* Compute bcp=Cp and bcp1=C'p+1 */
      if (MPFR_LIKELY(sh))
        {
          /* Try to compute them from C' rather than C (FIXME: Faster?) */
          bcp = (cp[0] & (MPFR_LIMB_ONE<<(sh-1))) ;
          if (MPFR_LIKELY( cp[0] & MPFR_LIMB_MASK(sh-1) ))
            bcp1 = 1;
          else
            {
              /* We can't compute C'p+1 from C'. Compute it from C */
              /* Start from bit x=p-d+sh in mantissa C
                 (+sh since we have already looked sh bits in C'!) */
              mpfr_prec_t x = p-d+sh-1;
              if (MPFR_LIKELY(x>p))
                /* We are already looked at all the bits of c, so C'p+1 = 0*/
                bcp1 = 0;
              else
                {
                  mp_limb_t *tp = MPFR_MANT(c);
                  mp_size_t kx = n-1 - (x / GMP_NUMB_BITS);
                  mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
                  DEBUG (printf ("(First) x=%lu Kx=%ld Sx=%lu\n",
                                 (unsigned long) x, (long) kx,
                                 (unsigned long) sx));
                  /* Looks at the last bits of limb kx (if sx=0 does nothing)*/
                  if (tp[kx] & MPFR_LIMB_MASK(sx))
                    bcp1 = 1;
                  else
                    {
                      /*kx += (sx==0);*/
                      /*If sx==0, tp[kx] hasn't been checked*/
                      do {
                        kx--;
                      } while (kx>=0 && tp[kx]==0);
                      bcp1 = (kx >= 0);
                    }
                }
            }
        }
      else
        {
          /* Compute Cp and C'p+1 from C with sh=0 */
          mp_limb_t *tp = MPFR_MANT(c);
          /* Start from bit x=p-d in mantissa C */
          mpfr_prec_t  x = p-d;
          mp_size_t   kx = n-1 - (x / GMP_NUMB_BITS);
          mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
          MPFR_ASSERTD(p >= d);
          bcp = (tp[kx] & (MPFR_LIMB_ONE<<sx));
          /* Looks at the last bits of limb kx (If sx=0, does nothing)*/
          if (tp[kx] & MPFR_LIMB_MASK(sx))
            bcp1 = 1;
          else
            {
              /*kx += (sx==0);*/ /*If sx==0, tp[kx] hasn't been checked*/
              do {
                kx--;
              } while (kx>=0 && tp[kx]==0);
              bcp1 = (kx>=0);
            }
        }
      DEBUG( printf("sh=%lu Cp=%d C'p+1=%d\n", sh, bcp!=0, bcp1!=0) );

      /* Check if we can lose a bit, and if so compute Cp+1 and C'p+2 */
      bp = MPFR_MANT(b);
      if (MPFR_UNLIKELY((bp[n-1]-cp[n-1]) <= MPFR_LIMB_HIGHBIT))
        {
          /* We can lose a bit so we precompute Cp+1 and C'p+2 */
          /* Test for trivial case: since C'p+1=0, Cp+1=0 and C'p+2 =0 */
          if (MPFR_LIKELY(bcp1 == 0))
            {
              bbcp = 0;
              bbcp1 = 0;
            }
          else /* bcp1 != 0 */
            {
              /* We can lose a bit:
                 compute Cp+1 and C'p+2 from mantissa C */
              mp_limb_t *tp = MPFR_MANT(c);
              /* Start from bit x=(p+1)-d in mantissa C */
              mpfr_prec_t x  = p+1-d;
              mp_size_t kx = n-1 - (x/GMP_NUMB_BITS);
              mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
              MPFR_ASSERTD(p > d);
              DEBUG (printf ("(pre) x=%lu Kx=%ld Sx=%lu\n",
                             (unsigned long) x, (long) kx,
                             (unsigned long) sx));
              bbcp = (tp[kx] & (MPFR_LIMB_ONE<<sx)) ;
              /* Looks at the last bits of limb kx (If sx=0, does nothing)*/
              /* If Cp+1=0, since C'p+1!=0, C'p+2=1 ! */
              if (MPFR_LIKELY(bbcp==0 || (tp[kx]&MPFR_LIMB_MASK(sx))))
                bbcp1 = 1;
              else
                {
                  /*kx += (sx==0);*/ /*If sx==0, tp[kx] hasn't been checked*/
                  do {
                    kx--;
                  } while (kx>=0 && tp[kx]==0);
                  bbcp1 = (kx>=0);
                  DEBUG (printf ("(Pre) Scan done for %ld\n", (long) kx));
                }
            } /*End of Bcp1 != 0*/
          DEBUG( printf("(Pre) Cp+1=%d C'p+2=%d\n", bbcp!=0, bbcp1!=0) );
        } /* End of "can lose a bit" */

      /* Clean shifted C' */
      mask = ~MPFR_LIMB_MASK (sh);
      cp[0] &= mask;

      /* Subtract the mantissa c from b in a */
      ap = MPFR_MANT(a);
      mpn_sub_n (ap, bp, cp, n);
      DEBUG( mpfr_print_mant_binary("Sub=  ", ap, p) );

     /* Normalize: we lose at max one bit*/
      if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
        {
          /* High bit is not set and we have to fix it! */
          /* Ap >= 010000xxx001 */
          mpn_lshift(ap, ap, n, 1);
          /* Ap >= 100000xxx010 */
          if (MPFR_UNLIKELY(bcp!=0)) /* Check if Cp = -1 */
            /* Since Cp == -1, we have to substract one more */
            {
              mpn_sub_1(ap, ap, n, MPFR_LIMB_ONE<<sh);
              MPFR_ASSERTD(MPFR_LIMB_MSB(ap[n-1]) != 0);
            }
          /* Ap >= 10000xxx001 */
          /* Final exponent -1 since we have shifted the mantissa */
          bx--;
          /* Update bcp and bcp1 */
          MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
          MPFR_ASSERTN(bbcp1 != (mp_limb_t) -1);
          bcp  = bbcp;
          bcp1 = bbcp1;
          /* We dont't have anymore a valid Cp+1!
             But since Ap >= 100000xxx001, the final sub can't unnormalize!*/
        }
      MPFR_ASSERTD( !(ap[0] & ~mask) );

      /* Rounding */
      if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
        {
          if (MPFR_LIKELY(bcp==0))
            goto truncate;
          else if ((bcp1) || ((ap[0] & (MPFR_LIMB_ONE<<sh)) != 0))
            goto sub_one_ulp;
          else
            goto truncate;
        }

      /* Update rounding mode */
      MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
      if (rnd_mode == MPFR_RNDZ && (MPFR_LIKELY(bcp || bcp1)))
        goto sub_one_ulp;
      goto truncate;
    }
  MPFR_RET_NEVER_GO_HERE ();

  /* Sub one ulp to the result */
 sub_one_ulp:
  mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
  /* Result should be smaller than exact value: inexact=-1 */
  inexact = -1;
  /* Check normalisation */
  if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
    {
      /* ap was a power of 2, and we lose a bit */
      /* Now it is 0111111111111111111[00000 */
      mpn_lshift(ap, ap, n, 1);
      bx--;
      /* And the lost bit x depends on Cp+1, and Cp */
      /* Compute Cp+1 if it isn't already compute (ie d==1) */
      /* FIXME: Is this case possible? */
      if (MPFR_UNLIKELY(d == 1))
        bbcp = 0;
      DEBUG( printf("(SubOneUlp)Cp=%d, Cp+1=%d C'p+1=%d\n", bcp!=0,bbcp!=0,bcp1!=0));
      /* Compute the last bit (Since we have shifted the mantissa)
         we need one more bit!*/
      MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
      if ( (rnd_mode == MPFR_RNDZ && bcp==0)
           || (rnd_mode==MPFR_RNDN && bbcp==0)
           || (bcp && bcp1==0) ) /*Exact result*/
        {
          ap[0] |= MPFR_LIMB_ONE<<sh;
          if (rnd_mode == MPFR_RNDN)
            inexact = 1;
          DEBUG( printf("(SubOneUlp) Last bit set\n") );
        }
      /* Result could be exact if C'p+1 = 0 and rnd == Zero
         since we have had one more bit to the result */
      /* Fixme: rnd_mode == MPFR_RNDZ needed ? */
      if (bcp1==0 && rnd_mode==MPFR_RNDZ)
        {
          DEBUG( printf("(SubOneUlp) Exact result\n") );
          inexact = 0;
        }
    }

  goto end_of_sub;

 truncate:
  /* Check if the result is an exact power of 2: 100000000000
     in which cases, we could have to do sub_one_ulp due to some nasty reasons:
     If Result is a Power of 2:
      + If rnd = AWAY,
      |  If Cp=-1 and C'p+1 = 0, SubOneUlp and the result is EXACT.
         If Cp=-1 and C'p+1 =-1, SubOneUlp and the result is above.
         Otherwise truncate
      + If rnd = NEAREST,
         If Cp= 0 and Cp+1  =-1 and C'p+2=-1, SubOneUlp and the result is above
         If cp=-1 and C'p+1 = 0, SubOneUlp and the result is exact.
         Otherwise truncate.
      X bit should always be set if SubOneUlp*/
  if (MPFR_UNLIKELY(ap[n-1] == MPFR_LIMB_HIGHBIT))
    {
      mp_size_t k = n-1;
      do {
        k--;
      } while (k>=0 && ap[k]==0);
      if (MPFR_UNLIKELY(k<0))
        {
          /* It is a power of 2! */
          /* Compute Cp+1 if it isn't already compute (ie d==1) */
          /* FIXME: Is this case possible? */
          if (d == 1)
            bbcp=0;
          DEBUG( printf("(Truncate) Cp=%d, Cp+1=%d C'p+1=%d C'p+2=%d\n", \
                 bcp!=0, bbcp!=0, bcp1!=0, bbcp1!=0) );
          MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
          MPFR_ASSERTN((rnd_mode != MPFR_RNDN) || (bcp != 0) || (bbcp == 0) || (bbcp1 != (mp_limb_t) -1));
          if (((rnd_mode != MPFR_RNDZ) && bcp)
              ||
              ((rnd_mode == MPFR_RNDN) && (bcp == 0) && (bbcp) && (bbcp1)))
            {
              DEBUG( printf("(Truncate) Do sub\n") );
              mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
              mpn_lshift(ap, ap, n, 1);
              ap[0] |= MPFR_LIMB_ONE<<sh;
              bx--;
              /* FIXME: Explain why it works (or why not)... */
              inexact = (bcp1 == 0) ? 0 : (rnd_mode==MPFR_RNDN) ? -1 : 1;
              goto end_of_sub;
            }
        }
    }

  /* Calcul of Inexact flag.*/
  inexact = MPFR_LIKELY(bcp || bcp1) ? 1 : 0;

 end_of_sub:
  /* Update Expo */
  /* FIXME: Is this test really useful?
      If d==0      : Exact case. This is never called.
      if 1 < d < p : bx=MPFR_EXP(b) or MPFR_EXP(b)-1 > MPFR_EXP(c) > emin
      if d == 1    : bx=MPFR_EXP(b). If we could lose any bits, the exact
                     normalisation is called.
      if d >=  p   : bx=MPFR_EXP(b) >= MPFR_EXP(c) + p > emin
     After SubOneUlp, we could have one bit less.
      if 1 < d < p : bx >= MPFR_EXP(b)-2 >= MPFR_EXP(c) > emin
      if d == 1    : bx >= MPFR_EXP(b)-1 = MPFR_EXP(c) > emin.
      if d >=  p   : bx >= MPFR_EXP(b)-1 > emin since p>=2.
  */
  MPFR_ASSERTD( bx >= __gmpfr_emin);
  /*
    if (MPFR_UNLIKELY(bx < __gmpfr_emin))
    {
      DEBUG( printf("(Final Underflow)\n") );
      if (rnd_mode == MPFR_RNDN &&
          (bx < __gmpfr_emin - 1 ||
           (inexact >= 0 && mpfr_powerof2_raw (a))))
        rnd_mode = MPFR_RNDZ;
      MPFR_TMP_FREE(marker);
      return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
    }
  */
  MPFR_SET_EXP (a, bx);

  MPFR_TMP_FREE(marker);
  MPFR_RET (inexact * MPFR_INT_SIGN (a));
}
Пример #25
0
/*
   ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1  
   needs (tp, 2n) temp space, everything reduced mod 2^b 
   inputs, outputs are fully reduced
  
   N.B: 2n is not the same as 2b rounded up to nearest limb!
*/
inline static int
mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,
			                                         mpir_ui b, mp_ptr tp)
{
  mp_size_t n, k;
  mp_limb_t c;

  TMP_DECL;

  n = BITS_TO_LIMBS (b);
  k = GMP_NUMB_BITS * n - b;

  ASSERT(b > 0);
  ASSERT(n > 0);
  ASSERT_MPN(yp, n);
  ASSERT_MPN(zp, n);
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);

#ifndef TUNE_PROGRAM_BUILD
  if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n))
  {
      mp_bitcnt_t depth1, depth = 1;
      mp_size_t w1, off;
      mp_ptr tx, ty, tz;
      mp_limb_t ret;

      TMP_MARK;

      tx = TMP_BALLOC_LIMBS(3*n + 3);
      ty = tx + n + 1;
      tz = ty + n + 1;

      MPN_COPY(ty, yp, n);
      MPN_COPY(tz, zp, n);
      ty[n] = 0;
      tz[n] = 0;

      while ((((mp_limb_t)1)<<depth) < b) depth++;
   
      if (depth < 12) off = mulmod_2expp1_table_n[0];
      else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
      depth1 = depth/2 - off;
   
      w1 = b/(((mp_limb_t)1)<<(2*depth1));

      mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1);

      MPN_COPY(xp, tx, n);
      ret = tx[n];
      
      TMP_FREE;

	   return ret;
  }
#endif

  if (yp == zp)
     mpn_sqr(tp, yp, n);
  else
     mpn_mul_n (tp, yp, zp, n);

  if (k == 0)
    {
      c = mpn_sub_n (xp, tp, tp + n, n);

      return mpn_add_1 (xp, xp, n, c);
    }

  c = tp[n - 1];
  tp[n - 1] &= GMP_NUMB_MASK >> k;

#if HAVE_NATIVE_mpn_sublsh_nc
  c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);
#else
  {
    mp_limb_t c1;
    c1 = mpn_lshift (tp + n, tp + n, n, k);
    tp[n] |= c >> (GMP_NUMB_BITS - k);
    c = mpn_sub_n (xp, tp, tp + n, n) + c1;
  }
#endif

  c = mpn_add_1 (xp, xp, n, c);
  xp[n - 1] &= GMP_NUMB_MASK >> k;

  return c;
}
Пример #26
0
void
mpn_toom3_sqr_n (mp_ptr c, mp_srcptr a, mp_size_t n, mp_ptr t)
{
  mp_size_t k, k1, kk1, r, twok, twor;
  mp_limb_t cy, saved, vinf0, cinf0;
  mp_ptr trec;
  int sa;
  mp_ptr c1, c2, c3, c4;

  ASSERT(GMP_NUMB_BITS >= 6);
  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */

  /* the algorithm is the same as mpn_mul_n_tc3, with b=a */

  k = (n + 2) / 3; /* ceil(n/3) */
  twok = 2 * k;
  k1 = k + 1;
  kk1 = k + k1;
  r = n - twok;   /* last chunk */
  twor = 2 * r;

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;

  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */

  cy = mpn_add_n (c, a, a + twok, r);
  if (r < k)
    __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);
  c3[2] = (c1[0] = cy) + mpn_add_n (c2 + 2, c, a + k, k);

#define v2 (t+2*k+1)
#define vinf (t+4*k+2)

  TOOM3_SQR_REC (t, c2 + 2, k1, trec);

  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);
  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)
    : mpn_sub_n (c, a + k, c, k);

  TOOM3_SQR_REC (c2, c, k1, trec);

#ifdef HAVE_NATIVE_mpn_addlsh1_n
  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
  if (r < k)
    __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
#else
  c[r] = mpn_lshift (c, a + twok, r, 1);
  if (r < k)
    MPN_ZERO(c + r + 1, k - r);
  c1[0] += mpn_add_n (c, c, a + k, k);
  mpn_lshift (c, c, k1, 1);
  c1[0] += mpn_add_n (c, c, a, k);
#endif

  TOOM3_SQR_REC (v2, c, k1, trec);

  TOOM3_SQR_REC (c, a, k, trec);

#ifdef HAVE_NATIVE_mpn_addlsh1_n
  mpn_addlsh1_n (v2, v2, c2, kk1);
#else
  mpn_lshift (t + 4 * k + 2, c2, kk1, 1);
  mpn_add_n (v2, v2, t + 4 * k + 2, kk1);
#endif

  saved = c4[0];
  TOOM3_SQR_REC (c4, a + twok, r, trec);
  cinf0 = mpn_add_n (vinf, c4, c, twor);
  vinf0 = c4[0];
  c4[0] = saved;

  toom3_interpolate (c, t, v2, c2, vinf, k, r, 1, vinf0, cinf0, vinf + twor);

#undef v2
#undef vinf
}
Пример #27
0
void
mpn_toom3_mul_n (mp_ptr c, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr t)
{
  mp_size_t k, k1, kk1, r, twok, twor;
  mp_limb_t cy, cc, saved, vinf0, cinf0;
  mp_ptr trec;
  int sa, sb;
  mp_ptr c1, c2, c3, c4, c5;

  ASSERT(GMP_NUMB_BITS >= 6);
  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */

  /*
  The algorithm is the following:

  0. k = ceil(n/3), r = n - 2k, B = 2^(GMP_NUMB_BITS), t = B^k
  1. split a and b in three parts each a0, a1, a2 and b0, b1, b2
     with a0, a1, b0, b1 of k limbs, and a2, b2 of r limbs
  2. v0   <- a0*b0
     v1   <- (a0+a1+a2)*(b0+b1+b2)
     v2   <- (a0+2*a1+4*a2)*(b0+2*b1+4*b2)
     vm1  <- (a0-a1+a2)*(b0-b1+b2)
     vinf <- a2*b2
     t1   <- (3*v0+2*vm1+v2)/6-2*vinf
     t2   <- (v1+vm1)/2
  3. result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
     c0   <- v0
     c1   <- v1 - t1
     c2   <- t2 - v0 - vinf
     c3   <- t1 - t2
     c4   <- vinf
  */

  k = (n + 2) / 3; /* ceil(n/3) */
  twok = 2 * k;
  k1 = k + 1;
  kk1 = k + k1;
  r = n - twok;   /* last chunk */
  twor = 2 * r;

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */

  /* put a0+a2 in {c, k+1}, and b0+b2 in {c+k+1, k+1};
     put a0+a1+a2 in {c+2k+2, k+1} and b0+b1+b2 in {c+3k+3,k+1}
     [requires 4k+4 <= 2n, ie. n >= 8] */
  cy = mpn_add_n (c, a, a + twok, r);
  cc = mpn_add_n (c1 + 1, b, b + twok, r);
  if (r < k)
    {
      __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);
      __GMPN_ADD_1 (cc, c1 + 1 + r, b + r, k - r, cc);
    }
  c3[2] = (c1[0] = cy) + mpn_add_n (c2 + 2, c, a + k, k);
  c4[3] = (c2[1] = cc) + mpn_add_n (c3 + 3, c1 + 1, b + k, k);

#define v2 (t+2*k+1)
#define vinf (t+4*k+2)

  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {t, 2k+1};
     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */
  TOOM3_MUL_REC (t, c2 + 2, c3 + 3, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
					v1
  */

  /* put |a0-a1+a2| in {c, k+1} and |b0-b1+b2| in {c+4k+2,k+1} */
  /* sa = sign(a0-a1+a2) */
  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);
  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)
		   : mpn_sub_n (c, a + k, c, k);
  /* b0+b2 is in {c+k+1, k+1} now */
  sb = (c2[1] != 0) ? 1 : mpn_cmp (c1 + 1, b + k, k);
  c5[2] = (sb >= 0) ? c2[1] - mpn_sub_n (c4 + 2, c1 + 1, b + k, k)
		    : mpn_sub_n (c4 + 2, b + k, c1 + 1, k);
  sa *= sb; /* sign of vm1 */

  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {c+2k, 2k+1};
     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */
  TOOM3_MUL_REC (c2, c, c4 + 2, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
		vm1                      v1
  */

  /* compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c+4k+2, k+1}
     [requires 5k+3 <= 2n, i.e. n >= 17] */
#ifdef HAVE_NATIVE_mpn_addlsh1_n
  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
  c5[2] = mpn_addlsh1_n (c4 + 2, b + k, b + twok, r);
  if (r < k)
    {
      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
      __GMPN_ADD_1 (c5[2], c4 + 2 + r, b + k + r, k - r, c5[2]);
    }
  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
  c5[2] = 2 * c5[2] + mpn_addlsh1_n (c4 + 2, b, c4 + 2, k);
#else
  c[r] = mpn_lshift (c, a + twok, r, 1);
  c4[r + 2] = mpn_lshift (c4 + 2, b + twok, r, 1);
  if (r < k)
    {
      MPN_ZERO(c + r + 1, k - r);
      MPN_ZERO(c4 + r + 3, k - r);
    }
  c1[0] += mpn_add_n (c, c, a + k, k);
  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b + k, k);
  mpn_lshift (c, c, k1, 1);
  mpn_lshift (c4 + 2, c4 + 2, k1, 1);
  c1[0] += mpn_add_n (c, c, a, k);
  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b, k);
#endif

  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}
     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */
  TOOM3_MUL_REC (v2, c, c4 + 2, k1, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
		vm1                      v1         v2
  */

  /* compute v0 := a0*b0 in {c, 2k} */
  TOOM3_MUL_REC (c, a, b, k, trec);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       vm1                      v1         v2
  */

  /* now compute (3v0+2vm1+v2)/6 = [v0 + (2vm1+v2)/3]/2
     v2 <- v2+2vm1 = 3*(a0*b0+2*a0*b2+2*a1*b1+2*a1*b2+2*a2*b0+2*a2*b1+6*a2*b2),
     thus 0 <= v2 < 51*B^(2k) < 2^6*B^(2k)
     Uses temporary space {t+4k+2,2k+1}, requires T(n) >= 6k+3.
  */
  if (sa >= 0)
    {
#ifdef HAVE_NATIVE_mpn_addlsh1_n
      mpn_addlsh1_n (v2, v2, c2, kk1);
#else
      /* we can use vinf=t+4k+2 as workspace since it is not full yet */
      mpn_lshift (vinf, c2, kk1, 1);
      mpn_add_n (v2, v2, vinf, kk1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_sublsh1_n
      mpn_sublsh1_n (v2, v2, c2, kk1);
#else
      /* we can use vinf=t+4k+2 as workspace since it is not full yet */
      mpn_lshift (vinf, c2, kk1, 1);
      mpn_sub_n (v2, v2, vinf, kk1);
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       vm1                      v1       v2+2vm1             */

  /* compute vinf := a2*b2 in {t+4k+2, 2r}: first put it in {c4, 2r},
     then copy it in {t+4k+2,2r} */
  saved = c4[0];
  TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);
  cinf0 = mpn_add_n (vinf, c4, c, twor); /* {v0,2r} + {vinf,2r} */
  vinf0 = c4[0];
  c4[0] = saved;

  toom3_interpolate (c, t, v2, c2, vinf, k, r, sa, vinf0, cinf0, vinf + twor);

#undef v2
#undef vinf
}
Пример #28
0
/* put in {c, 2n} where n = 2k+r the value of {v0,2k} (already in place)
   + B^k * [{v1, 2k+1} - {t1, 2k+1}]
   + B^(2k) * [{t2, 2k+1} - {v0+vinf, 2k}]
   + B^(3k) * [{t1, 2k+1} - {t2, 2k+1}]
   + B^(4k) * {vinf,2r} (high 2r-1 limbs already in place)
   where {t1, 2k+1} = (3*{v0,2k}+2*sa*{vm1,2k+1}+{v2,2k+1})/6-2*{vinf,2r}
	 {t2, 2k+1} = ({v1, 2k+1} + sa * {vm1, 2k+1})/2
   (sa is the sign of {vm1, 2k+1}).

   {vinf, 2r} stores the content of {v0, 2r} + {vinf, 2r}, with carry in cinf0.
   vinf0 is the low limb of vinf.

   ws is temporary space, and should have at least 2r limbs.

   Think about:

   The evaluated point a-b+c stands a good chance of having a zero carry
   limb, a+b+c would have a 1/4 chance, and 4*a+2*b+c a 1/8 chance, roughly.
   Perhaps this could be tested and stripped.  Doing so before recursing
   would be better than stripping at the start of mpn_toom3_mul_n/sqr_n,
   since then the recursion could be based on the new size.  Although in
   truth the kara vs toom3 crossover is never so exact that one limb either
   way makes a difference.

   A small value like 1 or 2 for the carry could perhaps also be handled
   with an add_n or addlsh1_n.  Would that be faster than an extra limb on a
   (recursed) multiply/square?
*/
static void
toom3_interpolate (mp_ptr c, mp_srcptr v1, mp_ptr v2, mp_ptr vm1,
		   mp_ptr vinf, mp_size_t k, mp_size_t r, int sa,
		   mp_limb_t vinf0, mp_limb_t cinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  unsigned long twok = k + k;
  unsigned long kk1 = twok + 1;
  unsigned long twor = r + r;
  mp_ptr c1, c2, c3, c4, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

#define v0 (c)
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0      |vm1|       hi(vinf)       v1       v2+2vm1      vinf
							      +lo(v0) */

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
#ifdef HAVE_NATIVE_mpn_rsh1add_n
  mpn_rsh1add_n (v2, v2, v0, twok); /* v2 <- (lo(v2)+v0) / 2, exact */
  cy = v2[twok] & 1; /* add high limb of v2 divided by 2 */
  v2[twok] >>= 1;
  MPN_INCR_U (v2 + twok - 1, 2, cy << (GMP_NUMB_BITS - 1));
#else
  v2[twok] += mpn_add_n (v2, v2, v0, twok);
  mpn_rshift (v2, v2, kk1, 1);
#endif

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0      |vm1|      hi(vinf)       v1    (3v0+2vm1+v2)    vinf
						    /6         +lo(v0) */

  /* vm1 <- t2 := (v1 + sa*vm1) / 2
     t2 = a0*b0+a0*b2+a1*b1+a2*b0+a2*b2 >= 0
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
     and the division by two is exact */
  if (sa >= 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, vm1, v1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_rshift (vm1, vm1, kk1, 1);
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       t2        hi(vinf)       v1         t1       vinf+lo(v0) */

  /* subtract 2*vinf to v2,
     result is t1 := a0*b0+a0*b2+a1*b1+a1*b2+a2*b0+a2*b1+a2*b2 >= 0 */
  saved = c4[0];
  c4[0] = vinf0;
#ifdef HAVE_NATIVE_mpn_sublsh1_n
  cy = mpn_sublsh1_n (v2, v2, c4, twor);
#else
  cy = mpn_lshift (ws, c4, twor, 1);
  cy += mpn_sub_n (v2, v2, ws, twor);
#endif
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
  c4[0] = saved;

  /* subtract {t2, 2k+1} in {c+3k, 2k+1} i.e. in {t2+k, 2k+1}:
     by chunks of k limbs from right to left to avoid overlap */
#define t2 (vm1)
  /* a borrow may occur in one of the 2 following __GMPN_SUB_1 calls, but since
     the final result is nonnegative, it will be compensated later on */
  __GMPN_SUB_1 (cout, c5, c5, twor - k, t2[twok]);
  cy = mpn_sub_n (c4, c4, t2 + k, k);
  __GMPN_SUB_1 (cout, c5, c5, twor - k, cy);
  cy = mpn_sub_n (c3, c3, t2, k);
  __GMPN_SUB_1 (cout, c4, c4, twor, cy);

  /* don't forget to add vinf0 in {c+4k, ...} */
  __GMPN_ADD_1 (cout, c4, c4, twor, vinf0);

  /* c  c+k c+2k c+3k c+4k+1   t  t+2k+1 t+4k+2
     v0     t2        hi(vinf) v1 t1     vinf
		 -t2                    +lo(v0)
  */

  /* c  c+k c+2k c+3k c+4k     t  t+2k+1 t+4k+2
     v0     t2        vinf     v1 t1     vinf
		 -t2                    +lo(v0)
  */

  /* subtract v0+vinf in {c+2k, ...} */
  cy = cinf0 + mpn_sub_n (c2, c2, vinf, twor);
  if (twor < twok)
    {
      __GMPN_SUB_1 (cy, c2 + twor, c2 + twor, twok - twor, cy);
      cy += mpn_sub_n (c2 + twor, c2 + twor, v0 + twor, twok - twor);
    }
  __GMPN_SUB_1 (cout, c4, c4, twor, cy); /* 2n-4k = 2r */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2          vinf      v1  t1      vinf
	      -v0   -t2                        +lo(v0)
	      -vinf                                    */

  /* subtract t1 in {c+k, ...} */
  cy = mpn_sub_n (c1, c1, v2, kk1);
  __GMPN_SUB_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1)=k+2r-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2          vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */

  /* add t1 in {c+3k, ...} */
  cy = mpn_add_n (c3, c3, v2, kk1);
  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0       t2    t1    vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */

  /* add v1 in {c+k, ...} */
  cy = mpn_add_n (c1, c1, v1, kk1);
  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */

  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
     v0  v1   t2    t1    vinf      v1  t1      vinf
	 -t1  -v0   -t2
	      -vinf                                    */
#undef v0
#undef t2
}
Пример #29
0
/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
int
mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
		      mp_ptr tp)
{
  unsigned i;
  int neg;
#ifdef HAVE_NATIVE_mpn_addlsh_n
  mp_limb_t cy;
#endif

  ASSERT (k >= 3);
  ASSERT (shift*k < GMP_NUMB_BITS);

  ASSERT (hn > 0);
  ASSERT (hn <= n);

  /* The degree k is also the number of full-size coefficients, so
   * that last coefficient, of size hn, starts at xp + k*n. */

#ifdef HAVE_NATIVE_mpn_addlsh_n
  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
  for (i = 4; i < k; i += 2)
    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);

  tp[n] = mpn_lshift (tp, xp+n, n, shift);
  for (i = 3; i < k; i+= 2)
    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);

  if (k & 1)
    {
      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
      MPN_INCR_U (tp + hn, n+1 - hn, cy);
    }
  else
    {
      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
    }

#else /* !HAVE_NATIVE_mpn_addlsh_n */
  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
  xp2[n] += mpn_add_n (xp2, xp, tp, n);
  for (i = 4; i < k; i += 2)
    {
      xp2[n] += mpn_lshift (tp, xp + ((mp_size_t) i)*n, n, i*shift);
      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
    }

  tp[n] = mpn_lshift (tp, xp+n, n, shift);
  for (i = 3; i < k; i+= 2)
    {
      tp[n] += mpn_lshift (xm2, xp + ((mp_size_t) i)*n, n, i*shift);
      tp[n] += mpn_add_n (tp, tp, xm2, n);
    }

  xm2[hn] = mpn_lshift (xm2, xp + ((mp_size_t) k)*n, hn, k*shift);
  if (k & 1)
    mpn_add (tp, tp, n+1, xm2, hn+1);
  else
    mpn_add (xp2, xp2, n+1, xm2, hn+1);
#endif /* !HAVE_NATIVE_mpn_addlsh_n */

  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;

#ifdef HAVE_NATIVE_mpn_add_n_sub_n
  if (neg)
    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
  else
    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
  if (neg)
    mpn_sub_n (xm2, tp, xp2, n + 1);
  else
    mpn_sub_n (xm2, xp2, tp, n + 1);

  mpn_add_n (xp2, xp2, tp, n + 1);
#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */

  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));

  return neg;
}
Пример #30
0
void
mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up,
		          mp_srcptr vp, mp_size_t n)
{
  mp_size_t ind;
  mp_limb_t cy, cy2, r30, r31;
  mp_ptr tp;
  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1;
  TMP_DECL;

  sn = (n + 3) / 4;

  h1 = n - 3*sn;
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)
#define b3 (vp + 3*sn)

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   TMP_MARK;

   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))
#define u6 (tp + 4*t4 + 4*(sn+1))

   u6[sn] = mpn_add(u6, a1, sn, a3, h1);
   u5[sn] = mpn_add_n(u5, a2, a0, sn);
   mpn_add_n(u3, u5, u6, sn + 1);
   n4 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u4, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u4, u6, u5, sn + 1);
      n4 = -n4;
   }

   u6[sn] = mpn_add(u6, b1, sn, b3, h1);
   u5[sn] = mpn_add_n(u5, b2, b0, sn);
   mpn_add_n(r2, u5, u6, sn + 1);
   n5 = sn + 1;
   if (mpn_cmp(u5, u6, sn + 1) >= 0)
      mpn_sub_n(u5, u5, u6, sn + 1);
   else
   {  
      mpn_sub_n(u5, u6, u5, sn + 1);
      n5 = -n5;
   }
 
   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, a2, sn, 1);
   MPN_COPY(r2, a3, h1);
   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);
   cy = mpn_addmul_1(r2, a1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u5, r1, r2, sn + 1);
   n6 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(u6, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(u6, r2, r1, sn + 1);
      n6 = -n6;
   }
 
#if HAVE_NATIVE_mpn_addlsh_n
   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);
   mpn_lshift(r1, r1, sn + 1, 1);
   cy = mpn_addlsh_n(r2, b3, b1, h1, 2);
#else
   r1[sn] = mpn_lshift(r1, b2, sn, 1);
   MPN_COPY(r2, b3, h1);
   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);
   cy = mpn_addmul_1(r2, b1, h1, 4);
#endif
   if (sn > h1) 
   {
      cy2 = mpn_lshift(r2 + h1, b1 + h1, sn - h1, 2);
      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);
   }
   r2[sn] = cy;
   mpn_add_n(u2, r1, r2, sn + 1);
   n8 = sn + 1;
   if (mpn_cmp(r1, r2, sn + 1) >= 0)
      mpn_sub_n(r2, r1, r2, sn + 1);
   else
   {  
      mpn_sub_n(r2, r2, r1, sn + 1);
      n8 = -n8;
   }
    
   r30 = r3[0];
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */
   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */
   r3[1] = r31;

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(u2, a2, a3, h1);
   if (sn > h1)
      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); 
   u2[sn] = cy;
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     
   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     
#else
   MPN_COPY(u2, a0, sn);
   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);
   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);
   cy = mpn_addmul_1(u2, a3, h1, 8);
   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);
   u2[sn] += cy;
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
   cy = mpn_addlsh1_n(r1, b2, b3, h1);
   if (sn > h1)
      cy = mpn_add_1(r1 + h1, b2 + h1, sn - h1, cy); 
   r1[sn] = cy;
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     
   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     
#else
   MPN_COPY(r1, b0, sn);
   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);
   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);
   cy = mpn_addmul_1(r1, b3, h1, 8);
   if (sn > h1) cy = mpn_add_1(r1 + h1, r1 + h1, sn - h1, cy);
   r1[sn] += cy;
#endif
   
   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */
   
   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h1); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */

   TC4_DENORM(r1, n1, t4 - 1);

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != 2*n) 
   {
	  MPN_ZERO((rp + rpn), 2*n - rpn);
   }

   TMP_FREE;
}