Пример #1
0
static mp_limb_t
mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  mp_limb_t qhl, cc;
  mp_size_t n2 = n/2;

  if (n % 2 != 0)
    {
      mp_ptr qp1 = qp + 1;
      qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch);
      qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2,
			mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch));

      cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]);
      cc = mpn_sub_1 (np + n, np + n, 1, cc);
      if (qhl != 0)
	cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
      while (cc != 0)
	{
	  qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1);
	  cc -= mpn_add_n (np + 1, np + 1, dp, n);
	}
      qhl += mpn_add_1 (qp1, qp1, n - 1,
			mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
    }
  else
    {
      qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch);
      qhl += mpn_add_1 (qp + n2, qp + n2, n2,
			mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch));
    }
  return qhl;
}
Пример #2
0
static void
gst_mpz_sub_ui (gst_mpz *dif, const gst_mpz *min, mp_limb_t sub)
{
    mp_srcptr minp;
    mp_ptr difp;
    mp_size_t minsize, difsize;
    mp_size_t abs_minsize;

    minsize = min->size;
    abs_minsize = ABS (minsize);

    /* If not space for SUM (and possible carry), increase space.  */
    difsize = abs_minsize + 1;
    if (dif->alloc < difsize)
        gst_mpz_realloc (dif, difsize);

    /* These must be after realloc (ADD1 may be the same as SUM).  */
    minp = min->d;
    difp = dif->d;

    if (sub == 0)
    {
        MPN_COPY (difp, minp, abs_minsize);
        dif->size = minsize;
        return;
    }
    if (abs_minsize == 0)
    {
        difp[0] = sub;
        dif->size = -1;
        return;
    }

    if (minsize < 0)
    {
        difsize = mpn_add_1 (difp, minp, abs_minsize, sub);
        if (difsize != 0)
            difp[abs_minsize] = 1;
        difsize = -(difsize + abs_minsize);
    }
    else
    {
        /* The signs are different.  Need exact comparision to determine
        which operand to subtract from which.  */
        if (abs_minsize == 1 && minp[0] < sub)
            difsize = -(abs_minsize
                        + mpn_sub_1 (difp, &sub, 1, *minp));
        else
            difsize = (abs_minsize
                       + mpn_sub_1 (difp, minp, abs_minsize, sub));
    }

    dif->size = difsize;
}
Пример #3
0
void fft_adjust_sqrt2(mp_limb_t * r, mp_limb_t * i1, 
            mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_limb_t * temp)
{
   mp_bitcnt_t wn = limbs*FLINT_BITS;
   mp_limb_t cy;
   mp_size_t j = i/2, k = w/2;
   mp_size_t y;
   mp_bitcnt_t b1;
   int negate = 0;

   b1 = j + wn/4 + i*k;
   if (b1 >= wn) 
   {
      negate = 1;
      b1 -= wn;
   }
   y  = b1/FLINT_BITS;
   b1 = b1%FLINT_BITS;
 
   /* multiply by 2^{j + wn/4 + i*k} */
   if (y)
   {
      mpn_copyi(temp + y, i1, limbs - y);
      cy = mpn_neg_n(temp, i1 + limbs - y, y);
      temp[limbs] = 0;
      mpn_addmod_2expp1_1(temp + y, limbs - y, -i1[limbs]);
      mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); 
      mpn_mul_2expmod_2expp1(r, temp, limbs, b1);
   } else
      mpn_mul_2expmod_2expp1(r, i1, limbs, b1);

   /* multiply by 2^{wn/2} */
   y = limbs/2;
   cy = 0;

   mpn_copyi(temp + y, r, limbs - y);
   temp[limbs] = 0;
   if (y) cy = mpn_neg_n(temp, r + limbs - y, y);
   mpn_addmod_2expp1_1(temp + y, limbs - y, -r[limbs]);
   mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); 
   
   /* shift by an additional half limb (rare) */
   if (limbs & 1) 
       mpn_mul_2expmod_2expp1(temp, temp, limbs, FLINT_BITS/2);

   /* subtract */
   if (negate)
      mpn_sub_n(r, r, temp, limbs + 1);
   else
      mpn_sub_n(r, temp, r, limbs + 1);
}
Пример #4
0
mp_limb_t
mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
		    gmp_pi1_t *dinv, mp_ptr tp)
{
  mp_size_t lo, hi;
  mp_limb_t cy, qh, ql;

  lo = n >> 1;			/* floor(n/2) */
  hi = n - lo;			/* ceil(n/2) */

  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
  else
    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);

  mpn_mul (tp, qp + lo, hi, dp, lo);

  cy = mpn_sub_n (np + lo, np + lo, tp, n);
  if (qh != 0)
    cy += mpn_sub_n (np + n, np + n, dp, lo);

  while (cy != 0)
    {
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
    }

  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
  else
    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);

  mpn_mul (tp, dp, hi, qp, lo);

  cy = mpn_sub_n (np, np, tp, n);
  if (ql != 0)
    cy += mpn_sub_n (np + lo, np + lo, dp, hi);

  while (cy != 0)
    {
      mpn_sub_1 (qp, qp, lo, 1);
      cy -= mpn_add_n (np, np, dp, n);
    }

  return qh;
}
Пример #5
0
void
mpz_combit (mpz_ptr d, unsigned long int bit_index)
{
  mp_size_t dsize = ABSIZ(d);
  mp_ptr dp = LIMBS(d);
  
  mp_size_t limb_index = bit_index / GMP_NUMB_BITS;
  mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));

  if (limb_index >= dsize)
    {
      MPZ_REALLOC(d, limb_index + 1);
      dp = LIMBS(d);
      
      MPN_ZERO(dp + dsize, limb_index + 1 - dsize);
      dsize = limb_index + 1;
    }
    
  if (SIZ(d) >= 0)
    {
      dp[limb_index] ^= bit;
      MPN_NORMALIZE (dp, dsize);
      SIZ(d) = dsize;
    }
  else
    {
      mp_limb_t x = -dp[limb_index];
      mp_size_t i;

      /* non-zero limb below us means ones-complement */
      for (i = limb_index-1; i >= 0; i--)
        if (dp[i] != 0)
          {
            x--;  /* change twos comp to ones comp */
            break;
          }

      if (x & bit)
	{
          mp_limb_t  c;

	  /* Clearing the bit increases the magitude. We might need a carry. */
	  MPZ_REALLOC(d, dsize + 1);
	  dp = LIMBS(d);

          __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index,
                        dsize - limb_index, bit);
          dp[dsize] = c;
          dsize += c;
        }
      else
	/* Setting the bit decreases the magnitude */
	mpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit);

      MPN_NORMALIZE (dp, dsize);
      SIZ(d) = -dsize;
    }
}
Пример #6
0
static void fp_set_si(element_ptr e, signed long int op) {
  const fp_field_data_ptr p = e->field->data;
  const size_t t = p->limbs;
  mp_limb_t *d = e->data;
  if (op < 0) {
    mpn_sub_1(d, p->primelimbs, t, -op);
  } else {
    d[0] = op;
    memset(&d[1], 0, sizeof(mp_limb_t) * (t - 1));
  }
}
Пример #7
0
static void
bench_modinv_powm (void *p)
{
  struct ecc_ctx *ctx = (struct ecc_ctx *) p;
  const struct ecc_curve *ecc = ctx->ecc;
  mp_size_t size = ecc->p.size;
  
  mpn_sub_1 (ctx->rp + size, ecc->p.m, size, 2);
  mpn_sec_powm (ctx->rp, ctx->ap, size,
		ctx->rp + size, ecc->p.bit_size,
		ecc->p.m, size, ctx->tp);
}
Пример #8
0
/* computes x + y * 2^shift (optionally negated) */
slong
_fmpr_add_mpn(fmpr_t z,
        mp_srcptr xman, mp_size_t xn, int xsign, const fmpz_t xexp,
        mp_srcptr yman, mp_size_t yn, int ysign, const fmpz_t yexp,
        slong shift, slong prec, fmpr_rnd_t rnd)
{
    slong tn, zn, alloc, ret, shift_bits, shift_limbs;
    int negative;
    mp_limb_t tmp_stack[ADD_STACK_ALLOC];
    mp_limb_t cy;
    mp_ptr tmp, tmp2;

    shift_limbs = shift / FLINT_BITS;
    shift_bits = shift % FLINT_BITS;

    /* x does not overlap with y or the result -- outcome is
       equivalent to adding/subtracting a small number to/from y
       and rounding */
    if (shift > xn * FLINT_BITS &&
        prec != FMPR_PREC_EXACT &&
        xn * FLINT_BITS + prec - (FLINT_BITS * (yn - 1)) < shift)
    {
        zn = (prec + FLINT_BITS - 1) / FLINT_BITS;
        zn = FLINT_MAX(zn, yn) + 2;
        shift_limbs = zn - yn;
        alloc = zn;

        ADD_TMP_ALLOC

        flint_mpn_zero(tmp, shift_limbs);
        flint_mpn_copyi(tmp + shift_limbs, yman, yn);

        if (xsign == ysign)
        {
            tmp[0] = 1;
        }
        else
        {
            mpn_sub_1(tmp, tmp, zn, 1);
            while (tmp[zn-1] == 0)
                zn--;
        }

        ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, ysign, prec, rnd);
        shift -= shift_limbs * FLINT_BITS;
        fmpz_add_si_inline(fmpr_expref(z), yexp, shift);

        ADD_TMP_FREE

        return ret;
    }
Пример #9
0
Файл: fmpz.c Проект: hperl/flint
void fmpz_sub_ui(fmpz_t output, const fmpz_t input, const unsigned long x)
{
   unsigned long carry;
   
   if (x)
   {
      if (!input[0])
      {
         output[1] = x;
         output[0] = -1L;
      } else if ((long) input[0] < 0)
      {
         carry = mpn_add_1(output + 1, input + 1, ABS(input[0]), x); 
         output[0] = input[0];
         if (carry)
         {
            output[ABS(output[0])+1] = carry;
            output[0]--;
         }
      } else if ((long) input[0] > 1L)
      {
         mpn_sub_1(output + 1, input + 1, input[0], x); 
         output[0] = input[0];
         NORM(output);
      } else
      {
         if (x <= input[1]) 
         {
            output[1] = input[1] - x;
            if (!output[1]) output[0] = 0;
            else output[0] = 1L;
         } else
         {
            output[1] = x - input[1];
            output[0] = -1L;
         } 
      }
   } else
   {
      fmpz_set(output, input);
   }
}
Пример #10
0
mp_limb_t
mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
		    mp_srcptr dip, mp_ptr tp)
{
  mp_size_t lo, hi;
  mp_limb_t cy, qh, ql;

  lo = n >> 1;			/* floor(n/2) */
  hi = n - lo;			/* ceil(n/2) */

  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
  else
    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);

  mpn_mul (tp, qp + lo, hi, dp, lo);

  cy = mpn_sub_n (np + lo, np + lo, tp, n);
  if (qh != 0)
    cy += mpn_sub_n (np + n, np + n, dp, lo);

  while (cy != 0)
    {
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
    }

  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
    ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip);
  else
    ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp);

  if (UNLIKELY (ql != 0))
    {
      mp_size_t i;
      for (i = 0; i < lo; i++)
	qp[i] = GMP_NUMB_MASK;
    }

  return qh;
}
Пример #11
0
void mpir_fft_adjust(mp_ptr r, mp_ptr i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w)
{
   mp_bitcnt_t b1;
   mp_limb_t cy;
   mp_size_t x;

   b1 = i*w;
   x  = b1/GMP_LIMB_BITS;
   b1 = b1%GMP_LIMB_BITS;

   if (x)
   {
      mpn_copyi(r + x, i1, limbs - x);
      r[limbs] = 0;
      cy = mpn_neg_n(r, i1 + limbs - x, x);
      mpn_addmod_2expp1_1(r + x, limbs - x, -i1[limbs]);
      mpn_sub_1(r + x, r + x, limbs - x + 1, cy); 
      mpn_mul_2expmod_2expp1(r, r, limbs, b1);
   } else
      mpn_mul_2expmod_2expp1(r, i1, limbs, b1);
}
Пример #12
0
static mp_limb_t
mpn_dc_div_3_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  mp_size_t twon = n + n;
  mp_limb_t qhl, cc;

  if (n < DIV_DC_THRESHOLD)
    qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
  else
    qhl = mpn_dc_div_2_by_1 (qp, np + n, dp + n, n, scratch);

  mpn_mul_n (scratch, qp, dp, n);
  cc = mpn_sub_n (np, np, scratch, twon);

  if (qhl != 0)
    cc += mpn_sub_n (np + n, np + n, dp, n);
  while (cc != 0)
    {
      qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
      cc -= mpn_add_n (np, np, dp, twon);
    }
  return qhl;
}
Пример #13
0
Файл: fmpz.c Проект: hperl/flint
void fmpz_sub_ui_inplace(fmpz_t output, const unsigned long x)
{
   unsigned long carry;
   
   if (x)
   {
      if (!output[0])
      {
         output[1] = x;
         output[0] = -1L;
      } else if ((long) output[0] < 0)
      {
         carry = mpn_add_1(output + 1, output + 1, ABS(output[0]), x); 
         if (carry)
         {
            output[ABS(output[0])+1] = carry;
            output[0]--;
         }
      } else if ((long) output[0] > 1L)
      {
         mpn_sub_1(output + 1, output + 1, output[0], x); 
         NORM(output);
      } else
      {
         if (x <= output[1]) 
         {
            output[1] -= x;
            if (!output[1]) output[0] = 0;
         } else
         {
            output[1] = x - output[1];
            output[0] = -1L;
         } 
      }
   }
}
Пример #14
0
mp_limb_t
mpn_dc_div_qr (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);		/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sb_div_qr's limits */
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn > dn)
    {
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0, d11, d01;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];
     d01 = d0 + 1;
     d11 = d1 + (d01 < d0);

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n2, n1, n0, d11, d01, d1, d0, dinv);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
      /* Do a 2qn / qn division */
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
	  else
	    qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}

      qn = nn - dn - qn;
      do
	{
	  qp -= dn;
	  np -= dn;
	  ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp));
	  qn -= dn;
	}
      while (qn > 0);
    }
  else
    {
      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);
      else
	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

      if (qn != dn)
	{
	  if (qn > dn - qn)
	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	  else
	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	  if (qh != 0)
	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	  while (cy != 0)
	    {
	      qh -= mpn_sub_1 (qp, qp, qn, 1);
	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
	    }
	}
    }

  TMP_FREE;
  return qh;
}
Пример #15
0
/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
 *
 * The result is expected to be ZERO if and only if one of the operand
 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
 * combine results and obtain a natural number when one knows in
 * advance that the final value is less than (B^rn-1).
 * Moreover it should not be a problem if mulmod_bnm1 is used to
 * compute the full product with an+bn <= rn, because this condition
 * implies (B^an-1)(B^bn-1) < (B^rn-1) .
 *
 * Requires 0 < bn <= an <= rn and an + bn > rn/2
 * Scratch need: rn + (need for recursive call OR rn + 4). This gives
 *
 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
 */
void
mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
{
  ASSERT (0 < bn);
  ASSERT (bn <= an);
  ASSERT (an <= rn);

  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
    {
      if (UNLIKELY (bn < rn))
	{
	  if (UNLIKELY (an + bn <= rn))
	    {
	      mpn_mul (rp, ap, an, bp, bn);
	    }
	  else
	    {
	      mp_limb_t cy;
	      mpn_mul (tp, ap, an, bp, bn);
	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
	      MPN_INCR_U (rp, rn, cy);
	    }
	}
      else
	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
    }
  else
    {
      mp_size_t n;
      mp_limb_t cy;
      mp_limb_t hi;

      n = rn >> 1;

      /* We need at least an + bn >= n, to be able to fit one of the
	 recursive products at rp. Requiring strict inequality makes
	 the coded slightly simpler. If desired, we could avoid this
	 restriction by initially halving rn as long as rn is even and
	 an + bn <= rn/2. */

      ASSERT (an + bn > n);

      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
	 and crt together as

	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
      */

#define a0 ap
#define a1 (ap + n)
#define b0 bp
#define b1 (bp + n)

#define xp  tp	/* 2n + 2 */
      /* am1  maybe in {xp, n} */
      /* bm1  maybe in {xp + n, n} */
#define sp1 (tp + 2*n + 2)
      /* ap1  maybe in {sp1, n + 1} */
      /* bp1  maybe in {sp1 + n + 1, n + 1} */

      {
	mp_srcptr am1, bm1;
	mp_size_t anm, bnm;
	mp_ptr so;

	bm1 = b0;
	bnm = bn;
	if (LIKELY (an > n))
	  {
	    am1 = xp;
	    cy = mpn_add (xp, a0, n, a1, an - n);
	    MPN_INCR_U (xp, n, cy);
	    anm = n;
	    so = xp + n;
	    if (LIKELY (bn > n))
	      {
		bm1 = so;
		cy = mpn_add (so, b0, n, b1, bn - n);
		MPN_INCR_U (so, n, cy);
		bnm = n;
		so += n;
	      }
	  }
	else
	  {
	    so = xp;
	    am1 = a0;
	    anm = an;
	  }

	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
      }

      {
	int       k;
	mp_srcptr ap1, bp1;
	mp_size_t anp, bnp;

	bp1 = b0;
	bnp = bn;
	if (LIKELY (an > n)) {
	  ap1 = sp1;
	  cy = mpn_sub (sp1, a0, n, a1, an - n);
	  sp1[n] = 0;
	  MPN_INCR_U (sp1, n + 1, cy);
	  anp = n + ap1[n];
	  if (LIKELY (bn > n)) {
	    bp1 = sp1 + n + 1;
	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
	    sp1[2*n+1] = 0;
	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
	    bnp = n + bp1[n];
	  }
	} else {
	  ap1 = a0;
	  anp = an;
	}

	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
	  k=0;
	else
	  {
	    int mask;
	    k = mpn_fft_best_k (n, 0);
	    mask = (1<<k) - 1;
	    while (n & mask) {k--; mask >>=1;};
	  }
	if (k >= FFT_FIRST_K)
	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
	else if (UNLIKELY (bp1 == b0))
	  {
	    ASSERT (anp + bnp <= 2*n+1);
	    ASSERT (anp + bnp > n);
	    ASSERT (anp >= bnp);
	    mpn_mul (xp, ap1, anp, bp1, bnp);
	    anp = anp + bnp - n;
	    ASSERT (anp <= n || xp[2*n]==0);
	    anp-= anp > n;
	    cy = mpn_sub (xp, xp, n, xp + n, anp);
	    xp[n] = 0;
	    MPN_INCR_U (xp, n+1, cy);
	  }
	else
	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
      }

      /* Here the CRT recomposition begins.

	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
	 Division by 2 is a bitwise rotation.

	 Assumes xp normalised mod (B^n+1).

	 The residue class [0] is represented by [B^n-1]; except when
	 both input are ZERO.
      */

#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
#if HAVE_NATIVE_mpn_rsh1add_nc
      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
      hi = cy << (GMP_NUMB_BITS - 1);
      cy = 0;
      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
	 overflows, i.e. a further increment will not overflow again. */
#else /* ! _nc */
      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
      cy >>= 1;
      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
#endif
#if GMP_NAIL_BITS == 0
      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
#else
      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
      rp[n-1] ^= hi;
#endif
#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
#if HAVE_NATIVE_mpn_add_nc
      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
#else /* ! _nc */
      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
#endif
      cy += (rp[0]&1);
      mpn_rshift(rp, rp, n, 1);
      ASSERT (cy <= 2);
      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
      cy >>= 1;
      /* We can have cy != 0 only if hi = 0... */
      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
      rp[n-1] |= hi;
      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
#endif
      ASSERT (cy <= 1);
      /* Next increment can not overflow, read the previous comments about cy. */
      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
      MPN_INCR_U(rp, n, cy);

      /* Compute the highest half:
	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
       */
      if (UNLIKELY (an + bn < rn))
	{
	  /* Note that in this case, the only way the result can equal
	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
	     then the output of both the recursive calls and this CRT
	     reconstruction is zero, not B^{rn} - 1. Which is good,
	     since the latter representation doesn't fit in the output
	     area.*/
	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);

	  /* FIXME: This subtraction of the high parts is not really
	     necessary, we do it to get the carry out, and for sanity
	     checking. */
	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
				   xp + an + bn - n, rn - (an + bn), cy);
	  ASSERT (an + bn == rn - 1 ||
		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
	  ASSERT (cy == (xp + an + bn - n)[0]);
	}
      else
	{
	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
	     DECR will affect _at most_ the lowest n limbs. */
	  MPN_DECR_U (rp, 2*n, cy);
	}
#undef a0
#undef a1
#undef b0
#undef b1
#undef xp
#undef sp1
    }
}
Пример #16
0
void
mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
{
#if 1
  __mpf_struct uu;
  mp_limb_t ul;

  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }

  ul = u;
  uu._mp_size = 1;
  uu._mp_d = &ul;
  uu._mp_exp = 1;
  mpf_sub (r, &uu, v);

#else
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t uexp;
  mp_size_t ediff;
  int negate;
  mp_limb_t ulimb;
  TMP_DECL;

  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      mpf_set_ui (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if (vsize < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add_ui (r, &v_negated, u);
      return;
    }

  /* Signs are now known to be the same.  */
  ASSERT (vsize > 0);
  ulimb = u;
  /* Make U be the operand with the largest exponent.  */
  negate = 1 < v->_mp_exp;
  prec = r->_mp_prec + negate;
  rp = r->_mp_d;
  if (negate)
    {
      usize = vsize;
      vsize = 1;
      up = v->_mp_d;
      vp = &ulimb;
      uexp = v->_mp_exp;
      ediff = uexp - 1;

      /* If U extends beyond PREC, ignore the part that does.  */
      if (usize > prec)
	{
	  up += usize - prec;
	  usize = prec;
	}
      ASSERT (ediff > 0);
    }
  else
    {
      vp = v->_mp_d;
      ediff = 1 - v->_mp_exp;
  /* Ignore leading limbs in U and V that are equal.  Doing
     this helps increase the precision of the result.  */
      if (ediff == 0 && ulimb == vp[vsize - 1])
	{
	  usize = 0;
	  vsize--;
	  uexp = 0;
	  /* Note that V might now have leading zero limbs.
	     In that case we have to adjust uexp.  */
	  for (;;)
	    {
	      if (vsize == 0) {
		rsize = 0;
		uexp = 0;
		goto done;
	      }
	      if ( vp[vsize - 1] != 0)
		break;
	      vsize--, uexp--;
	    }
	}
      else
	{
	  usize = 1;
	  uexp = 1;
	  up = &ulimb;
	}
      ASSERT (usize <= prec);
    }

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (rp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
  /* If V extends beyond PREC, ignore the part that does.
     Note that this can make vsize neither zero nor negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      ASSERT (vsize > 0);
      for (;;)
	{
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      ASSERT (usize > 0 && vsize > 0);
      TMP_MARK;

      tp = TMP_ALLOC_LIMBS (prec);

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      ASSERT (usize == 1 && vsize >= 1 && ulimb == *up); /* usize is 1>ediff, vsize >= 1 */
	      if (1 < vsize)
		{
		  /* u        */
		  /* vvvvvvv  */
		  rsize = vsize;
		  vsize -= 1;
		  /* mpn_cmp (up, vp + vsize - usize, usize) > 0 */
		  if (ulimb > vp[vsize])
		    {
		      tp[vsize] = ulimb - vp[vsize] - 1;
		      ASSERT_CARRY (mpn_neg (tp, vp, vsize));
		    }
		  else
		    {
		      /* vvvvvvv  */  /* Swap U and V. */
		      /* u        */
		      MPN_COPY (tp, vp, vsize);
		      tp[vsize] = vp[vsize] - ulimb;
		      negate = 1;
		    }
		}
	      else /* vsize == usize == 1 */
		{
		  /* u     */
		  /* v     */
		  rsize = 1;
		  negate = ulimb < vp[0];
		  tp[0] = negate ? vp[0] - ulimb: ulimb - vp[0];
		}
	    }
	  else
	    {
	      ASSERT (vsize + ediff <= usize);
	      ASSERT (vsize == 1 && usize >= 2 && ulimb == *vp);
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - 1;
		  MPN_COPY (tp, up, size);
		  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up + size, usize - size, ulimb));
		  rsize = usize;
		}
		/* Other cases are not possible */
		/* uuuu     */
		/*   vvvvv  */
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  ASSERT_CARRY (mpn_neg (tp, vp, vsize));
	  rsize = vsize + ediff;
	  size = rsize - usize;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up, usize, CNST_LIMB (1)));
	}

      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  uexp--;
	}
      MPN_COPY (rp, tp, rsize);
      TMP_FREE;
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  r->_mp_exp = uexp;
#endif
}
Пример #17
0
/* 
   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is
   either correct or one too large. We require dp to be normalised and inv
   to be a precomputed inverse given by mpn_invert.
*/
mp_limb_t 
mpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, 
                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);
   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }
   
   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy + 1);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
    
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }
  
   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) 
   {
	   /* Special case, multiply out to get accurate quotient */
	   ret -= mpn_sub_1(qp, qp, dn, 1);
       if (UNLIKELY(ret == ~CNST_LIMB(0)))
          ret += mpn_add_1(qp, qp, dn, 1);
       /* ret is now guaranteed to be 0*/
       ASSERT(ret == 0);
       mpn_mul_n(tp, qp, dp, dn);
       mpn_sub_n(tp, np, tp, dn+1);
       while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)
	   {
		   ret += mpn_add_1(qp, qp, dn, 1);
		   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);
	   }
       /* Not possible for ret == 2 as we have qp*dp <= np */
       ASSERT(ret + ret2 < 2);
   }

   TMP_FREE;

   return ret + ret2;
}
Пример #18
0
void
_gst_mpz_com (gst_mpz *dst, const gst_mpz *src)
{
    mp_size_t size = src->size;
    mp_srcptr src_ptr;
    mp_ptr dst_ptr;

    if (size >= 0)
    {
        /* As with infinite precision: one's complement, two's complement.
        But this can be simplified using the identity -x = ~x + 1.
         So we're going to compute (~~x) + 1 = x + 1!  */

        if (dst->alloc < size + 1)
            gst_mpz_realloc (dst, size + 1);

        src_ptr = src->d;
        dst_ptr = dst->d;

        if (size == 0)
        {
            /* Special case, as mpn_add wants the first arg's size >= the
               second arg's size.  */
            dst_ptr[0] = 1;
            dst->size = -1;
            return;
        }

        {
            int cy;

            cy = mpn_add_1 (dst_ptr, src_ptr, size, 1);
            if (cy)
            {
                dst_ptr[size] = cy;
                size++;
            }
        }

        /* Store a negative size, to indicate ones-extension.  */
        dst->size = -size;
    }
    else
    {
        /* As with infinite precision: two's complement, then one's complement.
        But that can be simplified using the identity -x = ~(x - 1).
         So we're going to compute ~~(x - 1) = x - 1!  */
        size = -size;

        if (dst->alloc < size)
            gst_mpz_realloc (dst, size);

        src_ptr = src->d;
        dst_ptr = dst->d;

        size += mpn_sub_1 (dst_ptr, src_ptr, size, 1);

        /* Store a positive size, to indicate zero-extension.  */
        dst->size = size;
    }
}
Пример #19
0
mp_limb_t
mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
{
  mp_size_t qn;
  mp_limb_t qh, cy, qsave;
  mp_ptr tp;
  TMP_DECL;

  TMP_MARK;

  ASSERT (dn >= 6);
  ASSERT (nn > dn);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);

  qn = nn - dn;
  qp += qn;
  np += nn;
  dp += dn;

  if (qn >= dn)
    {
      qn++;			/* pretend we'll need an extra limb */
      /* Reduce qn mod dn without division, optimizing small operations.  */
      do
	qn -= dn;
      while (qn > dn);

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      tp = TMP_SALLOC_LIMBS (dn);

      /* Perform the typically smaller block first.  */
      if (qn == 1)
	{
	  mp_limb_t q, n2, n1, n0, d1, d0;

	  /* Handle qh up front, for simplicity. */
	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
	  if (qh)
	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));

	  /* A single iteration of schoolbook: One 3/2 division,
	     followed by the bignum update and adjustment. */
	  n2 = np[0];
	  n1 = np[-1];
	  n0 = np[-2];
	  d1 = dp[-1];
	  d0 = dp[-2];

	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));

	  if (UNLIKELY (n2 == d1) && n1 == d0)
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
	      ASSERT (cy == n2);
	    }
	  else
	    {
	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);

	      if (dn > 2)
		{
		  mp_limb_t cy, cy1;
		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);

		  cy1 = n0 < cy;
		  n0 = (n0 - cy) & GMP_NUMB_MASK;
		  cy = n1 < cy1;
		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
		  np[-2] = n0;

		  if (UNLIKELY (cy != 0))
		    {
		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
		      qh -= (q == 0);
		      q = (q - 1) & GMP_NUMB_MASK;
		    }
		}
	      else
		np[-2] = n0;

	      np[-1] = n1;
	    }
	  qp[0] = q;
	}
      else
	{
	  if (qn == 2)
	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
	  else
	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);

	  if (qn != dn)
	    {
	      if (qn > dn - qn)
		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
	      else
		mpn_mul (tp, dp - dn, dn - qn, qp, qn);

	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
	      if (qh != 0)
		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);

	      while (cy != 0)
		{
		  qh -= mpn_sub_1 (qp, qp, qn, 1);
		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
		}
	    }
	}
      qn = nn - dn - qn + 1;
      while (qn > dn)
	{
	  qp -= dn;
	  np -= dn;
	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
	  qn -= dn;
	}

      /* Since we pretended we'd need an extra quotient limb before, we now
	 have made sure the code above left just dn-1=qn quotient limbs to
	 develop.  Develop that plus a guard limb. */
      qn--;
      qp -= qn;
      np -= dn;
      qsave = qp[qn];
      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
      MPN_COPY_INCR (qp, qp + 1, qn);
      qp[qn] = qsave;
    }
  else    /* (qn < dn) */
    {
      mp_ptr q2p;
#if 0				/* not possible since we demand nn > dn */
      if (qn == 0)
	{
	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
	  if (qh)
	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
	  TMP_FREE;
	  return qh;
	}
#endif

      qp -= qn;			/* point at low limb of next quotient block */
      np -= qn;			/* point in the middle of partial remainder */

      q2p = TMP_SALLOC_LIMBS (qn + 1);
      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
	 callers not to be silly?  */
      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
	{
	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
				    dp - (qn + 1), qn + 1, dinv->inv32);
	}
      else
	{
	  /* It is tempting to use qp for recursive scratch and put quotient in
	     tp, but the recursive scratch needs one limb too many.  */
	  tp = TMP_SALLOC_LIMBS (qn + 1);
	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
	}
      MPN_COPY (qp, q2p + 1, qn);
    }

  TMP_FREE;
  return qh;
}
Пример #20
0
dig_t bn_sub1_low(dig_t *c, const dig_t *a, dig_t digit, int size) {
	return mpn_sub_1(c, a, size, digit);
}
Пример #21
0
void tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y)
{
  mp_size_t  sign, wu, xu, ws, new_wn, min_size, dsize;
  mp_limb_t  cy;

  /* w unaffected if x==0 or y==0 */
  if (xn == 0 || y == 0)
    return;

  sign = xn;
  xu = ABS (xn);

  ws = *wn;
  if (*wn == 0)
  {
      /* nothing to add to, just set x*y, "sign" gives the sign */
      cy = mpn_mul_1 (wp, xp, xu, y);
      if (cy)
		{
			wp[xu] = cy;
         xu = xu + 1;
		} 
      *wn = (sign >= 0 ? xu : -xu);
      return;
  }
  
  sign ^= *wn;
  wu = ABS (*wn);

  new_wn = MAX (wu, xu);
  min_size = MIN (wu, xu);

  if (sign >= 0)
  {
      /* addmul of absolute values */

      cy = mpn_addmul_1 (wp, xp, min_size, y);
      
      dsize = xu - wu;
#if HAVE_NATIVE_mpn_mul_1c
      if (dsize > 0)
        cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy);
      else if (dsize < 0)
      {
          dsize = -dsize;
          cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);
      }
#else
      if (dsize != 0)
      {
          mp_limb_t cy2;
          if (dsize > 0)
            cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y);
          else
          {
              dsize = -dsize;
              cy2 = 0;
          }
          cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);
      }
#endif

      if (cy)
		{
			wp[dsize + min_size] = cy;
         new_wn ++;
		}
   } else
   {
      /* submul of absolute values */

      cy = mpn_submul_1 (wp, xp, min_size, y);
      if (wu >= xu)
      {
          /* if w bigger than x, then propagate borrow through it */
          if (wu != xu)
            cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy);

          if (cy != 0)
          {
              /* Borrow out of w, take twos complement negative to get
                 absolute value, flip sign of w.  */
              wp[new_wn] = ~-cy;  /* extra limb is 0-cy */
              mpn_not (wp, new_wn);
              new_wn++;
              MPN_INCR_U (wp, new_wn, CNST_LIMB(1));
              ws = -*wn;
          }
      } else /* wu < xu */
      {
          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
             take twos complement and use an mpn_mul_1 for the rest.  */

          mp_limb_t  cy2;

          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
          mpn_not (wp, wu);
          cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1));
          cy -= 1;

          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
          cy2 = (cy == MP_LIMB_T_MAX);
          cy += cy2;
          MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy);
          wp[new_wn] = cy;
          new_wn += (cy != 0);

          /* Apply any -1 from above.  The value at wp+wsize is non-zero
             because y!=0 and the high limb of x will be non-zero.  */
          if (cy2)
            MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1));

          ws = -*wn;
        }

      /* submul can produce high zero limbs due to cancellation, both when w
         has more limbs or x has more  */
      MPN_NORMALIZE (wp, new_wn);
  }

  *wn = (ws >= 0 ? new_wn : -new_wn);

  ASSERT (new_wn == 0 || wp[new_wn - 1] != 0);
}
Пример #22
0
void
mpf_sub (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
{
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t exp;
  mp_size_t ediff;
  int negate;
  TMP_DECL;

  usize = u->_mp_size;
  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (usize == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      if (r != u)
        mpf_set (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if ((usize ^ vsize) < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add (r, u, &v_negated);
      return;
    }

  TMP_MARK;

  /* Signs are now known to be the same.  */
  negate = usize < 0;

  /* Make U be the operand with the largest exponent.  */
  if (u->_mp_exp < v->_mp_exp)
    {
      mpf_srcptr t;
      t = u; u = v; v = t;
      negate ^= 1;
      usize = u->_mp_size;
      vsize = v->_mp_size;
    }

  usize = ABS (usize);
  vsize = ABS (vsize);
  up = u->_mp_d;
  vp = v->_mp_d;
  rp = r->_mp_d;
  prec = r->_mp_prec + 1;
  exp = u->_mp_exp;
  ediff = u->_mp_exp - v->_mp_exp;

  /* If ediff is 0 or 1, we might have a situation where the operands are
     extremely close.  We need to scan the operands from the most significant
     end ignore the initial parts that are equal.  */
  if (ediff <= 1)
    {
      if (ediff == 0)
	{
	  /* Skip leading limbs in U and V that are equal.  */
	  if (up[usize - 1] == vp[vsize - 1])
	    {
	      /* This loop normally exits immediately.  Optimize for that.  */
	      do
		{
		  usize--;
		  vsize--;
		  exp--;

		  if (usize == 0)
		    {
                      /* u cancels high limbs of v, result is rest of v */
		      negate ^= 1;
                    cancellation:
                      /* strip high zeros before truncating to prec */
                      while (vsize != 0 && vp[vsize - 1] == 0)
                        {
                          vsize--;
                          exp--;
                        }
		      if (vsize > prec)
			{
			  vp += vsize - prec;
			  vsize = prec;
			}
                      MPN_COPY_INCR (rp, vp, vsize);
                      rsize = vsize;
                      goto done;
		    }
		  if (vsize == 0)
		    {
                      vp = up;
                      vsize = usize;
                      goto cancellation;
		    }
		}
	      while (up[usize - 1] == vp[vsize - 1]);
	    }

	  if (up[usize - 1] < vp[vsize - 1])
	    {
	      /* For simplicity, swap U and V.  Note that since the loop above
		 wouldn't have exited unless up[usize - 1] and vp[vsize - 1]
		 were non-equal, this if-statement catches all cases where U
		 is smaller than V.  */
	      MPN_SRCPTR_SWAP (up,usize, vp,vsize);
	      negate ^= 1;
	      /* negating ediff not necessary since it is 0.  */
	    }

	  /* Check for
	     x+1 00000000 ...
	      x  ffffffff ... */
	  if (up[usize - 1] != vp[vsize - 1] + 1)
	    goto general_case;
	  usize--;
	  vsize--;
	  exp--;
	}
      else /* ediff == 1 */
	{
	  /* Check for
	     1 00000000 ...
	     0 ffffffff ... */

	  if (up[usize - 1] != 1 || vp[vsize - 1] != GMP_NUMB_MAX
	      || (usize >= 2 && up[usize - 2] != 0))
	    goto general_case;

	  usize--;
	  exp--;
	}

      /* Skip sequences of 00000000/ffffffff */
      while (vsize != 0 && usize != 0 && up[usize - 1] == 0
	     && vp[vsize - 1] == GMP_NUMB_MAX)
	{
	  usize--;
	  vsize--;
	  exp--;
	}

      if (usize == 0)
	{
	  while (vsize != 0 && vp[vsize - 1] == GMP_NUMB_MAX)
	    {
	      vsize--;
	      exp--;
	    }
	}

      if (usize > prec - 1)
	{
	  up += usize - (prec - 1);
	  usize = prec - 1;
	}
      if (vsize > prec - 1)
	{
	  vp += vsize - (prec - 1);
	  vsize = prec - 1;
	}

      tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);
      {
	mp_limb_t cy_limb;
	if (vsize == 0)
	  {
	    mp_size_t size, i;
	    size = usize;
	    for (i = 0; i < size; i++)
	      tp[i] = up[i];
	    tp[size] = 1;
	    rsize = size + 1;
	    exp++;
	    goto normalize;
	  }
	if (usize == 0)
	  {
	    mp_size_t size, i;
	    size = vsize;
	    for (i = 0; i < size; i++)
	      tp[i] = ~vp[i] & GMP_NUMB_MASK;
	    cy_limb = 1 - mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);
	    rsize = vsize;
	    if (cy_limb == 0)
	      {
		tp[rsize] = 1;
		rsize++;
		exp++;
	      }
	    goto normalize;
	  }
	if (usize >= vsize)
	  {
	    /* uuuu     */
	    /* vv       */
	    mp_size_t size;
	    size = usize - vsize;
	    MPN_COPY (tp, up, size);
	    cy_limb = mpn_sub_n (tp + size, up + size, vp, vsize);
	    rsize = usize;
	  }
	else /* (usize < vsize) */
	  {
	    /* uuuu     */
	    /* vvvvvvv  */
	    mp_size_t size, i;
	    size = vsize - usize;
	    for (i = 0; i < size; i++)
	      tp[i] = ~vp[i] & GMP_NUMB_MASK;
	    cy_limb = mpn_sub_n (tp + size, up, vp + size, usize);
	    cy_limb+= mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
	    cy_limb-= mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);
	    rsize = vsize;
	  }
	if (cy_limb == 0)
	  {
	    tp[rsize] = 1;
	    rsize++;
	    exp++;
	  }
	goto normalize;
      }
    }

general_case:
  /* If U extends beyond PREC, ignore the part that does.  */
  if (usize > prec)
    {
      up += usize - prec;
      usize = prec;
    }

  /* If V extends beyond PREC, ignore the part that does.
     Note that this may make vsize negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

  /* Allocate temp space for the result.  Allocate
     just vsize + ediff later???  */
  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (tp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      for (;;)
	{
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      /* Have to compare the leading limbs of u and v
		 to determine whether to compute u - v or v - u.  */
	      if (usize >= vsize)
		{
		  /* uuuu     */
		  /* vv       */
		  mp_size_t size;
		  size = usize - vsize;
		  MPN_COPY (tp, up, size);
		  mpn_sub_n (tp + size, up + size, vp, vsize);
		  rsize = usize;
		}
	      else /* (usize < vsize) */
		{
		  /* uuuu     */
		  /* vvvvvvv  */
		  mp_size_t size, i;
		  size = vsize - usize;
		  tp[0] = -vp[0] & GMP_NUMB_MASK;
		  for (i = 1; i < size; i++)
		    tp[i] = ~vp[i] & GMP_NUMB_MASK;
		  mpn_sub_n (tp + size, up, vp + size, usize);
		  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		  rsize = vsize;
		}
	    }
	  else
	    {
	      if (vsize + ediff <= usize)
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - vsize;
		  MPN_COPY (tp, up, size);
		  mpn_sub (tp + size, up + size, usize - size, vp, vsize);
		  rsize = usize;
		}
	      else
		{
		  /* uuuu     */
		  /*   vvvvv  */
		  mp_size_t size, i;
		  size = vsize + ediff - usize;
		  tp[0] = -vp[0] & GMP_NUMB_MASK;
		  for (i = 1; i < size; i++)
		    tp[i] = ~vp[i] & GMP_NUMB_MASK;
		  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
		  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		  rsize = vsize + ediff;
		}
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  size = vsize + ediff - usize;
	  tp[0] = -vp[0] & GMP_NUMB_MASK;
	  for (i = 1; i < vsize; i++)
	    tp[i] = ~vp[i] & GMP_NUMB_MASK;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
	  rsize = size + usize;
	}

    normalize:
      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  exp--;
	}
      MPN_COPY (rp, tp, rsize);
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  if (rsize == 0)
    exp = 0;
  r->_mp_exp = exp;
  TMP_FREE;
}
Пример #23
0
mp_limb_t
mpn_mu_div_q (mp_ptr qp,
	      mp_srcptr np, mp_size_t nn,
	      mp_srcptr dp, mp_size_t dn,
	      mp_ptr scratch)
{
  mp_ptr tp, rp;
  mp_size_t qn;
  mp_limb_t cy, qh;
  TMP_DECL;

  TMP_MARK;

  qn = nn - dn;

  tp = TMP_BALLOC_LIMBS (qn + 1);

  if (qn >= dn)			/* nn >= 2*dn + 1 */
    {
       /* |_______________________|   dividend
			 |________|   divisor  */

      rp = TMP_BALLOC_LIMBS (nn + 1);
      MPN_COPY (rp + 1, np, nn);
      rp[0] = 0;

      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
      if (qh != 0)
	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);

      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);

      if (UNLIKELY (cy != 0))
	{
	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
	     canonically reduced, replace the returned value of B^(qn-dn)+eps
	     by the largest possible value.  */
	  mp_size_t i;
	  for (i = 0; i < qn + 1; i++)
	    tp[i] = GMP_NUMB_MAX;
	}

      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
	 smaller than the max error, we cannot trust the quotient.  */
      if (tp[0] > 4)
	{
	  MPN_COPY (qp, tp + 1, qn);
	}
      else
	{
	  mp_limb_t cy;
	  mp_ptr pp;

	  pp = rp;
	  mpn_mul (pp, tp + 1, qn, dp, dn);

	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;

	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
	  else /* Same as above */
	    MPN_COPY (qp, tp + 1, qn);
	}
    }
  else
    {
       /* |_______________________|   dividend
		 |________________|   divisor  */

      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
	 the most significant dn-1 limbs will actually be read, but it is not
	 pretty.  */

      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
			     dp + dn - (qn + 1), qn + 1, scratch);

      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
         error from the divisor truncation.  */
      if (tp[0] > 6)
	{
	  MPN_COPY (qp, tp + 1, qn);
	}
      else
	{
	  mp_limb_t cy;

	  /* FIXME: a shorter product should be enough; we may use already
	     allocated space... */
	  rp = TMP_BALLOC_LIMBS (nn);
	  mpn_mul (rp, dp, dn, tp + 1, qn);

	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;

	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
	  else /* Same as above */
	    MPN_COPY (qp, tp + 1, qn);
	}
    }

  TMP_FREE;
  return qh;
}
Пример #24
0
mp_limb_t
mpn_sb_div_q (mp_ptr qp,
		 mp_ptr np, mp_size_t nn,
		 mp_srcptr dp, mp_size_t dn,
		 mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t qn, i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0, d11, d01;
  mp_limb_t cy, cy1;
  mp_limb_t q;
  mp_limb_t flag;

  mp_size_t dn_orig = dn, qn_orig;
  mp_srcptr dp_orig = dp;
  mp_ptr np_orig = np;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  if (dn <= SB_DIVAPPR_Q_SMALL_THRESHOLD)
     {
   qn_orig = qn;

   /* Reduce until dn - 2 >= qn */
   for (qn--, np--; qn > dn - 2; qn--)
     {
       /* fetch next word */
       cy = np[0];

       np--;
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
      
	    /* np -= dp*q1 */
       cy -= mpn_submul_1(np - dn + 1, dp, dn, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[dn - 1]))
         {
       if (cy || mpn_cmp(np - dn + 1, dp, dn) >= 0)
       {
          q++;
          cy -= mpn_sub_n(np - dn + 1, np - dn + 1, dp, dn);
       }
       }

       qp[qn] = q;
     }
   
   qn++;
   dp = dp + dn - qn - 1; /* make dp length qn + 1 */
   
   flag = ~CNST_LIMB(0);

   for ( ; qn > 0; qn--)
     {
       /* fetch next word */
       cy = np[0];
 
       np--;
       /* rare case where truncation ruins normalisation */
       if (cy > dp[qn] || (cy == dp[qn] && mpn_cmp(np - qn + 1, dp, qn) >= 0))
         {
       __div_helper(qp, np - qn, dp, qn);
       flag = 0;
       break;
         }
       
       mpir_divapprox32_preinv2(q, cy, np[0], dinv);
         
       /* np -= dp*q */
       cy -= mpn_submul_1(np - qn, dp, qn + 1, q);

       /* correct if remainder is too large */
       if (UNLIKELY(cy || np[0] >= dp[qn]))
         {
       if (cy || mpn_cmp(np - qn, dp, qn + 1) >= 0)
         {
       q++;
       cy -= mpn_sub_n(np - qn, np - qn, dp, qn + 1);
         }
         }
       
       qp[qn - 1] = q;
       dp++;
     }

     np--;
     n1 = np[1];
     qn = qn_orig;
     } 
  else
     {
  qp += qn;

  dn -= 2;			/* offset dn by 2 for main division loops,
				   saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  d01 = d0 + 1;
  d11 = d1 + (d01 < d0);
  
  np -= 2;

  n1 = np[1];

  for (i = qn - (dn + 2); i >= 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
	{
	  q = GMP_NUMB_MASK;
	  mpn_submul_1 (np - dn, dp, dn + 2, q);
	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  cy = mpn_submul_1 (np - dn, dp, dn, q);

	  cy1 = n0 < cy;
	  n0 = (n0 - cy) & GMP_NUMB_MASK;
	  cy = n1 < cy1;
	  n1 -= cy1;
	  np[0] = n0;

	  if (UNLIKELY (cy != 0))
	    {
	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
	      q--;
	    }
	}

      *--qp = q;
    }

  flag = ~CNST_LIMB(0);

  if (dn >= 0)
    {
      for (i = dn; i > 0; i--)
	{
	  np--;
	  if (UNLIKELY (n1 >= (d1 & flag)))
	    {
	      q = GMP_NUMB_MASK;
	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);

	      if (UNLIKELY (n1 != cy))
		{
		  if (n1 < (cy & flag))
		    {
		      q--;
		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
		    }
		  else
		    flag = 0;
		}
	      n1 = np[1];
	    }
	  else
	    {
	      mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	      cy = mpn_submul_1 (np - dn, dp, dn, q);

	      cy1 = n0 < cy;
	      n0 = (n0 - cy) & GMP_NUMB_MASK;
	      cy = n1 < cy1;
	      n1 -= cy1;
	      np[0] = n0;

	      if (UNLIKELY (cy != 0))
		{
		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
		  q--;
		}
	    }

	  *--qp = q;

	  /* Truncate operands.  */
	  dn--;
	  dp++;
	}

      np--;
      if (UNLIKELY (n1 >= (d1 & flag)))
	{
	  q = GMP_NUMB_MASK;
	  cy = mpn_submul_1 (np, dp, 2, q);

	  if (UNLIKELY (n1 != cy))
	    {
	      if (n1 < (cy & flag))
		{
		  q--;
		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
		}
	      else
		flag = 0;
	    }
	  n1 = np[1];
	}
      else
	{
	  mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);

	  np[0] = n0;
	  np[1] = n1;
	}

      *--qp = q;
    }
  ASSERT_ALWAYS (np[1] == n1);
  }

  np += 2;

  dn = dn_orig;
  if (UNLIKELY (n1 < (dn & flag)))
    {
      mp_limb_t q, x;

      /* The quotient may be too large if the remainder is small.  Recompute
	 for above ignored operand parts, until the remainder spills.

	 FIXME: The quality of this code isn't the same as the code above.
	 1. We don't compute things in an optimal order, high-to-low, in order
	    to terminate as quickly as possible.
	 2. We mess with pointers and sizes, adding and subtracting and
	    adjusting to get things right.  It surely could be streamlined.
	 3. The only termination criteria are that we determine that the
	    quotient needs to be adjusted, or that we have recomputed
	    everything.  We should stop when the remainder is so large
	    that no additional subtracting could make it spill.
	 4. If nothing else, we should not do two loops of submul_1 over the
	    data, instead handle both the triangularization and chopping at
	    once.  */

      x = n1;

      if (dn > 2)
	{
	  /* Compensate for triangularization.  */
	  mp_limb_t y;

	  dp = dp_orig;
	  if (qn + 1 < dn)
	    {
	      dp += dn - (qn + 1);
	      dn = qn + 1;
	    }

	  y = np[-2];

	  for (i = dn - 3; i >= 0; i--)
	    {
	      q = qp[i];
	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);

	      if (y < cy)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      ASSERT_ALWAYS (cy == 0);
		      return qh - cy;
		    }
		  x--;
		}
	      y -= cy;
	    }
	  np[-2] = y;
	}

      dn = dn_orig;
      if (qn + 1 < dn)
	{
	  /* Compensate for ignored dividend and divisor tails.  */

	  dp = dp_orig;
	  np = np_orig;

	  if (qh != 0)
	    {
	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      if (qn != 0)
			cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh - cy;
		    }
		  x--;
		}
	    }

	  if (qn == 0)
	    return qh;

	  for (i = dn - qn - 2; i >= 0; i--)
	    {
	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
	      if (cy != 0)
		{
		  if (x == 0)
		    {
		      cy = mpn_sub_1 (qp, qp, qn, 1);
		      return qh;
		    }
		  x--;
		}
	    }
	}
    }

  return qh;
}
Пример #25
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, tz;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dp, qp, rp;
  mp_limb_t rh;
  mp_limb_t t;
  mp_limb_t dinv;
  int count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }


  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (tz);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = 0; test < count;)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
      if (maxdbits > nbits)
	dbits = random_word (rands) % nbits + 1;
      else
	dbits = random_word (rands) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	{
	  RANDFUNC (n, rands, nbits);
	  do
	    {
	      RANDFUNC (d, rands, dbits);
	    }
	  while (mpz_sgn (d) == 0);

	  np = PTR (n);
	  dp = PTR (d);
	  nn = SIZ (n);
	  dn = SIZ (d);
	}
      while (nn < dn);

      dp[0] |= 1;

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	dp[0] = GMP_NUMB_MAX;

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = 0; i <= clearn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	}

      test++;

      binvert_limb (dinv, dp[0]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_qr */
	      MPN_ZERO (qp, nn - dn);
	      MPN_ZERO (rp, dn);
	      MPN_COPY (rp, np, nn);
	      rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
	    }

	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_q */
	      MPN_COPY (rp, np, nn);
	      MPN_ZERO (qp, nn - dn);
	      mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
	    }
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_qr */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_q */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
	}

      if (nn > dn)
	{
	  /* Test mpn_bdiv_qr */
	  itch = mpn_bdiv_qr_itch (nn, dn);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_ZERO (qp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);

	  check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
	}

      if (nn - dn < 2 || dn < 2)
	continue;

      /* Test mpn_mu_bdiv_qr */
      itch = mpn_mu_bdiv_qr_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn);
      MPN_ZERO (rp, dn);
      rp[dn] = rran1;
      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");

      /* Test mpn_mu_bdiv_q */
      itch = mpn_mu_bdiv_q_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn + 1);
      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (tz);

  tests_end ();
  return 0;
}
Пример #26
0
mp_limb_t mpn_divrem_euclidean_qr_2(mp_ptr qp, mp_ptr xp, mp_size_t xn, mp_srcptr dp)
{
   mp_size_t qn;
   mp_limb_t qf, t[2], t1[2], q, h, l, d1, d2, i;
   int c1, c3, c4;

   ASSERT(xn >= 2);
   ASSERT_MPN(dp, 2);
   ASSERT_MPN(xp, xn);
   ASSERT(dp[1] != 0);

   qn = xn - 1;

   /* ASSERT(!MPN_OVERLAP_P(qp, qn, xp, xn)); */ /* FIXME: correct this overlap requirement */
   ASSERT((dp[1]>>(GMP_NUMB_BITS - 1)) != 0);

   h = 0;
   d1 = dp[1];
   d2 = dp[0];
   
   invert_limb(i, d1);
   
   l = xp[xn - 1];
   qn = xn - 2;
   t[0] = xp[qn];

   if (l < d1)
   { 
      h = t[1] = l;
      l = t[0] = xp[qn];
      qf = 0;
   }
   else
   {
      qf = 1;
      t[1] = l - d1;
      t1[1] = 0;
      t1[0] = d2;
   
      if (mpn_sub_n(t, t, t1, 2))
      {
         qf--;
         mpn_add_n(t, t, dp, 2);
      }
   
      h = t[1];
      l = t[0];
   }

   for (qn = xn - 3; qn >= 0; qn--)
   {
      t[0] = xp[qn];
    
      if (h < d1)
      {
         udiv_qrnnd_preinv(q, t[1], h, l, d1, i);
         umul_ppmm(t1[1], t1[0], q, d2);
         if (mpn_sub_n(t, t, t1, 2))
         {
            q--;
            if (mpn_add_n(t, t, dp, 2) == 0)
            {
               q--;
               
               ASSERT_CARRY(mpn_add_n(t, t, dp, 2));
            }
         }
      }
      else
      {
         ASSERT(h == d1);
         q = -1;
         t[1] = l;
         c3 = mpn_add_n(t, t, dp, 2);
         c1 = mpn_sub_1(t + 1, t + 1, 1, d2);
         c4 = c3 - c1;
       
         if (l >= d1)
         {
            ASSERT(c3 != 0);
            ASSERT(c4 == 0);
         } /* our guess is B + 1, so q = B - 1 is correct */
         else
         {
            ASSERT(c4 <= 0); /* our guess is B so q = B - 1 or B - 2 */
            if (c4 != 0)
            {
               q--;
               mpn_add_n(t, t, dp, 2);
            }
         }       
      }
    
      h = t[1];
      l = t[0];
      qp[qn] = q;
   }

   xp[1] = t[1];
   xp[0] = t[0];

   return qf;
}
Пример #27
0
void
test_main (void)
{
  gmp_randstate_t rands;
  mpz_t r;
  unsigned i;

  gmp_randinit_default (rands);
  mpz_init (r);

  for (i = 0; ecc_curves[i]; i++)
    {
      const struct ecc_curve *ecc = ecc_curves[i];
      mp_size_t size = ecc_size (ecc);
      mp_limb_t *p = xalloc_limbs (ecc_size_j (ecc));
      mp_limb_t *q = xalloc_limbs (ecc_size_j (ecc));
      mp_limb_t *n = xalloc_limbs (size);
      mp_limb_t *scratch = xalloc_limbs (ecc->mul_g_itch);

      mpn_zero (n, size);

      n[0] = 1;
      ecc->mul_g (ecc, p, n, scratch);
      ecc->h_to_a (ecc, 0, p, p, scratch);

      if (mpn_cmp (p, ecc->g, 2*size != 0))
	{
	  fprintf (stderr, "ecc->mul_g with n = 1 failed.\n");
	  abort ();
	}

      for (n[0] = 2; n[0] <= 4; n[0]++)
	{
	  ecc->mul_g (ecc, p, n, scratch);
	  test_ecc_mul_h (i, n[0], p);
	}

      /* (order - 1) * g = - g */
      mpn_sub_1 (n, ecc->q.m, size, 1);
      ecc->mul_g (ecc, p, n, scratch);
      ecc->h_to_a (ecc, 0, p, p, scratch);
      if (ecc->p.bit_size == 255)
	/* For edwards curves, - (x,y ) == (-x, y). FIXME: Swap x and
	   y, to get identical negation? */
	mpn_sub_n (p, ecc->p.m, p, size);
      else
	mpn_sub_n (p + size, ecc->p.m, p + size, size);
      if (mpn_cmp (p, ecc->g, 2*size) != 0)
	{
	  fprintf (stderr, "ecc->mul_g with n = order - 1 failed.\n");
	  abort ();
	}

      free (n);
      free (p);
      free (q);
      free (scratch);
    }
  mpz_clear (r);
  gmp_randclear (rands);
}
Пример #28
0
void
mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
{
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t uexp;
  mp_size_t ediff;
  int negate;
  mp_limb_t ulimb;
  TMP_DECL;

  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      mpf_set_ui (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if (vsize < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add_ui (r, &v_negated, u);
      return;
    }

  TMP_MARK;

  /* Signs are now known to be the same.  */

  ulimb = u;
  /* Make U be the operand with the largest exponent.  */
  if (1 < v->_mp_exp)
    {
      negate = 1;
      usize = ABS (vsize);
      vsize = 1;
      up = v->_mp_d;
      vp = &ulimb;
      rp = r->_mp_d;
      prec = r->_mp_prec + 1;
      uexp = v->_mp_exp;
      ediff = uexp - 1;
    }
  else
    {
      negate = 0;
      usize = 1;
      vsize = ABS (vsize);
      up = &ulimb;
      vp = v->_mp_d;
      rp = r->_mp_d;
      prec = r->_mp_prec;
      uexp = 1;
      ediff = 1 - v->_mp_exp;
    }

  /* Ignore leading limbs in U and V that are equal.  Doing
     this helps increase the precision of the result.  */
  if (ediff == 0)
    {
      /* This loop normally exits immediately.  Optimize for that.  */
      for (;;)
	{
	  usize--;
	  vsize--;
	  if (up[usize] != vp[vsize])
	    break;
	  uexp--;
	  if (usize == 0)
	    goto Lu0;
	  if (vsize == 0)
	    goto Lv0;
	}
      usize++;
      vsize++;
      /* Note that either operand (but not both operands) might now have
	 leading zero limbs.  It matters only that U is unnormalized if
	 vsize is now zero, and vice versa.  And it is only in that case
	 that we have to adjust uexp.  */
      if (vsize == 0)
      Lv0:
	while (usize != 0 && up[usize - 1] == 0)
	  usize--, uexp--;
      if (usize == 0)
      Lu0:
	while (vsize != 0 && vp[vsize - 1] == 0)
	  vsize--, uexp--;
    }

  /* If U extends beyond PREC, ignore the part that does.  */
  if (usize > prec)
    {
      up += usize - prec;
      usize = prec;
    }

  /* If V extends beyond PREC, ignore the part that does.
     Note that this may make vsize negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

  /* Allocate temp space for the result.  Allocate
     just vsize + ediff later???  */
  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (tp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      for (;;)
	{
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      /* Have to compare the leading limbs of u and v
		 to determine whether to compute u - v or v - u.  */
	      if (usize > vsize)
		{
		  /* uuuu     */
		  /* vv       */
		  int cmp;
		  cmp = mpn_cmp (up + usize - vsize, vp, vsize);
		  if (cmp >= 0)
		    {
		      mp_size_t size;
		      size = usize - vsize;
		      MPN_COPY (tp, up, size);
		      mpn_sub_n (tp + size, up + size, vp, vsize);
		      rsize = usize;
		    }
		  else
		    {
		      /* vv       */  /* Swap U and V. */
		      /* uuuu     */
		      mp_size_t size, i;
		      size = usize - vsize;
		      tp[0] = -up[0] & GMP_NUMB_MASK;
		      for (i = 1; i < size; i++)
			tp[i] = ~up[i] & GMP_NUMB_MASK;
		      mpn_sub_n (tp + size, vp, up + size, vsize);
		      mpn_sub_1 (tp + size, tp + size, vsize, (mp_limb_t) 1);
		      negate ^= 1;
		      rsize = usize;
		    }
		}
	      else if (usize < vsize)
		{
		  /* uuuu     */
		  /* vvvvvvv  */
		  int cmp;
		  cmp = mpn_cmp (up, vp + vsize - usize, usize);
		  if (cmp > 0)
		    {
		      mp_size_t size, i;
		      size = vsize - usize;
		      tp[0] = -vp[0] & GMP_NUMB_MASK;
		      for (i = 1; i < size; i++)
			tp[i] = ~vp[i] & GMP_NUMB_MASK;
		      mpn_sub_n (tp + size, up, vp + size, usize);
		      mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		      rsize = vsize;
		    }
		  else
		    {
		      /* vvvvvvv  */  /* Swap U and V. */
		      /* uuuu     */
		      /* This is the only place we can get 0.0.  */
		      mp_size_t size;
		      size = vsize - usize;
		      MPN_COPY (tp, vp, size);
		      mpn_sub_n (tp + size, vp + size, up, usize);
		      negate ^= 1;
		      rsize = vsize;
		    }
		}
	      else
		{
		  /* uuuu     */
		  /* vvvv     */
		  int cmp;
		  cmp = mpn_cmp (up, vp + vsize - usize, usize);
		  if (cmp > 0)
		    {
		      mpn_sub_n (tp, up, vp, usize);
		      rsize = usize;
		    }
		  else
		    {
		      mpn_sub_n (tp, vp, up, usize);
		      negate ^= 1;
		      rsize = usize;
		      /* can give zero */
		    }
		}
	    }
	  else
	    {
	      if (vsize + ediff <= usize)
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - vsize;
		  MPN_COPY (tp, up, size);
		  mpn_sub (tp + size, up + size, usize - size, vp, vsize);
		  rsize = usize;
		}
	      else
		{
		  /* uuuu     */
		  /*   vvvvv  */
		  mp_size_t size, i;
		  size = vsize + ediff - usize;
		  tp[0] = -vp[0] & GMP_NUMB_MASK;
		  for (i = 1; i < size; i++)
		    tp[i] = ~vp[i] & GMP_NUMB_MASK;
		  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
		  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
		  rsize = vsize + ediff;
		}
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  size = vsize + ediff - usize;
	  tp[0] = -vp[0] & GMP_NUMB_MASK;
	  for (i = 1; i < vsize; i++)
	    tp[i] = ~vp[i] & GMP_NUMB_MASK;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
	  rsize = size + usize;
	}

      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  uexp--;
	}
      MPN_COPY (rp, tp, rsize);
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  r->_mp_exp = uexp;
  TMP_FREE;
}
Пример #29
0
int
mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
{
  mpfr_exp_t bx,cx;
  mpfr_uexp_t d;
  mpfr_prec_t p, sh, cnt;
  mp_size_t n;
  mp_limb_t *ap, *bp, *cp;
  mp_limb_t limb;
  int inexact;
  mp_limb_t bcp,bcp1; /* Cp and C'p+1 */
  mp_limb_t bbcp = (mp_limb_t) -1, bbcp1 = (mp_limb_t) -1; /* Cp+1 and C'p+2,
    gcc claims that they might be used uninitialized. We fill them with invalid
    values, which should produce a failure if so. See README.dev file. */

  MPFR_TMP_DECL(marker);

  MPFR_TMP_MARK(marker);

  MPFR_ASSERTD(MPFR_PREC(a) == MPFR_PREC(b) && MPFR_PREC(b) == MPFR_PREC(c));
  MPFR_ASSERTD(MPFR_IS_PURE_FP(b));
  MPFR_ASSERTD(MPFR_IS_PURE_FP(c));

  /* Read prec and num of limbs */
  p = MPFR_PREC (b);
  n = MPFR_PREC2LIMBS (p);

  /* Fast cmp of |b| and |c|*/
  bx = MPFR_GET_EXP (b);
  cx = MPFR_GET_EXP (c);
  if (MPFR_UNLIKELY(bx == cx))
    {
      mp_size_t k = n - 1;
      /* Check mantissa since exponent are equals */
      bp = MPFR_MANT(b);
      cp = MPFR_MANT(c);
      while (k>=0 && MPFR_UNLIKELY(bp[k] == cp[k]))
        k--;
      if (MPFR_UNLIKELY(k < 0))
        /* b == c ! */
        {
          /* Return exact number 0 */
          if (rnd_mode == MPFR_RNDD)
            MPFR_SET_NEG(a);
          else
            MPFR_SET_POS(a);
          MPFR_SET_ZERO(a);
          MPFR_RET(0);
        }
      else if (bp[k] > cp[k])
        goto BGreater;
      else
        {
          MPFR_ASSERTD(bp[k]<cp[k]);
          goto CGreater;
        }
    }
  else if (MPFR_UNLIKELY(bx < cx))
    {
      /* Swap b and c and set sign */
      mpfr_srcptr t;
      mpfr_exp_t tx;
    CGreater:
      MPFR_SET_OPPOSITE_SIGN(a,b);
      t  = b;  b  = c;  c  = t;
      tx = bx; bx = cx; cx = tx;
    }
  else
    {
      /* b > c */
    BGreater:
      MPFR_SET_SAME_SIGN(a,b);
    }

  /* Now b > c */
  MPFR_ASSERTD(bx >= cx);
  d = (mpfr_uexp_t) bx - cx;
  DEBUG (printf ("New with diff=%lu\n", (unsigned long) d));

  if (MPFR_UNLIKELY(d <= 1))
    {
      if (MPFR_LIKELY(d < 1))
        {
          /* <-- b -->
             <-- c --> : exact sub */
          ap = MPFR_MANT(a);
          mpn_sub_n (ap, MPFR_MANT(b), MPFR_MANT(c), n);
          /* Normalize */
        ExactNormalize:
          limb = ap[n-1];
          if (MPFR_LIKELY(limb))
            {
              /* First limb is not zero. */
              count_leading_zeros(cnt, limb);
              /* cnt could be == 0 <= SubD1Lose */
              if (MPFR_LIKELY(cnt))
                {
                  mpn_lshift(ap, ap, n, cnt); /* Normalize number */
                  bx -= cnt; /* Update final expo */
                }
              /* Last limb should be ok */
              MPFR_ASSERTD(!(ap[0] & MPFR_LIMB_MASK((unsigned int) (-p)
                                                    % GMP_NUMB_BITS)));
            }
          else
            {
              /* First limb is zero */
              mp_size_t k = n-1, len;
              /* Find the first limb not equal to zero.
                 FIXME:It is assume it exists (since |b| > |c| and same prec)*/
              do
                {
                  MPFR_ASSERTD( k > 0 );
                  limb = ap[--k];
                }
              while (limb == 0);
              MPFR_ASSERTD(limb != 0);
              count_leading_zeros(cnt, limb);
              k++;
              len = n - k; /* Number of last limb */
              MPFR_ASSERTD(k >= 0);
              if (MPFR_LIKELY(cnt))
                mpn_lshift(ap+len, ap, k, cnt); /* Normalize the High Limb*/
              else
                {
                  /* Must use DECR since src and dest may overlap & dest>=src*/
                  MPN_COPY_DECR(ap+len, ap, k);
                }
              MPN_ZERO(ap, len); /* Zeroing the last limbs */
              bx -= cnt + len*GMP_NUMB_BITS; /* Update Expo */
              /* Last limb should be ok */
              MPFR_ASSERTD(!(ap[len]&MPFR_LIMB_MASK((unsigned int) (-p)
                                                    % GMP_NUMB_BITS)));
            }
          /* Check expo underflow */
          if (MPFR_UNLIKELY(bx < __gmpfr_emin))
            {
              MPFR_TMP_FREE(marker);
              /* inexact=0 */
              DEBUG( printf("(D==0 Underflow)\n") );
              if (rnd_mode == MPFR_RNDN &&
                  (bx < __gmpfr_emin - 1 ||
                   (/*inexact >= 0 &&*/ mpfr_powerof2_raw (a))))
                rnd_mode = MPFR_RNDZ;
              return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
            }
          MPFR_SET_EXP (a, bx);
          /* No rounding is necessary since the result is exact */
          MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
          MPFR_TMP_FREE(marker);
          return 0;
        }
      else /* if (d == 1) */
        {
          /* | <-- b -->
             |  <-- c --> */
          mp_limb_t c0, mask;
          mp_size_t k;
          MPFR_UNSIGNED_MINUS_MODULO(sh, p);
          /* If we lose at least one bit, compute 2*b-c (Exact)
           * else compute b-c/2 */
          bp = MPFR_MANT(b);
          cp = MPFR_MANT(c);
          k = n-1;
          limb = bp[k] - cp[k]/2;
          if (limb > MPFR_LIMB_HIGHBIT)
            {
              /* We can't lose precision: compute b-c/2 */
              /* Shift c in the allocated temporary block */
            SubD1NoLose:
              c0 = cp[0] & (MPFR_LIMB_ONE<<sh);
              cp = MPFR_TMP_LIMBS_ALLOC (n);
              mpn_rshift(cp, MPFR_MANT(c), n, 1);
              if (MPFR_LIKELY(c0 == 0))
                {
                  /* Result is exact: no need of rounding! */
                  ap = MPFR_MANT(a);
                  mpn_sub_n (ap, bp, cp, n);
                  MPFR_SET_EXP(a, bx); /* No expo overflow! */
                  /* No truncate or normalize is needed */
                  MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
                  /* No rounding is necessary since the result is exact */
                  MPFR_TMP_FREE(marker);
                  return 0;
                }
              ap = MPFR_MANT(a);
              mask = ~MPFR_LIMB_MASK(sh);
              cp[0] &= mask; /* Delete last bit of c */
              mpn_sub_n (ap, bp, cp, n);
              MPFR_SET_EXP(a, bx);                 /* No expo overflow! */
              MPFR_ASSERTD( !(ap[0] & ~mask) );    /* Check last bits */
              /* No normalize is needed */
              MPFR_ASSERTD(ap[n-1] > ~ap[n-1]);
              /* Rounding is necessary since c0 = 1*/
              /* Cp =-1 and C'p+1=0 */
              bcp = 1; bcp1 = 0;
              if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
                {
                  /* Even Rule apply: Check Ap-1 */
                  if (MPFR_LIKELY( (ap[0] & (MPFR_LIMB_ONE<<sh)) == 0) )
                    goto truncate;
                  else
                    goto sub_one_ulp;
                }
              MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
              if (rnd_mode == MPFR_RNDZ)
                goto sub_one_ulp;
              else
                goto truncate;
            }
          else if (MPFR_LIKELY(limb < MPFR_LIMB_HIGHBIT))
            {
              /* We lose at least one bit of prec */
              /* Calcul of 2*b-c (Exact) */
              /* Shift b in the allocated temporary block */
            SubD1Lose:
              bp = MPFR_TMP_LIMBS_ALLOC (n);
              mpn_lshift (bp, MPFR_MANT(b), n, 1);
              ap = MPFR_MANT(a);
              mpn_sub_n (ap, bp, cp, n);
              bx--;
              goto ExactNormalize;
            }
          else
            {
              /* Case: limb = 100000000000 */
              /* Check while b[k] == c'[k] (C' is C shifted by 1) */
              /* If b[k]<c'[k] => We lose at least one bit*/
              /* If b[k]>c'[k] => We don't lose any bit */
              /* If k==-1 => We don't lose any bit
                 AND the result is 100000000000 0000000000 00000000000 */
              mp_limb_t carry;
              do {
                carry = cp[k]&MPFR_LIMB_ONE;
                k--;
              } while (k>=0 &&
                       bp[k]==(carry=cp[k]/2+(carry<<(GMP_NUMB_BITS-1))));
              if (MPFR_UNLIKELY(k<0))
                {
                  /*If carry then (sh==0 and Virtual c'[-1] > Virtual b[-1]) */
                  if (MPFR_UNLIKELY(carry)) /* carry = cp[0]&MPFR_LIMB_ONE */
                    {
                      /* FIXME: Can be faster? */
                      MPFR_ASSERTD(sh == 0);
                      goto SubD1Lose;
                    }
                  /* Result is a power of 2 */
                  ap = MPFR_MANT (a);
                  MPN_ZERO (ap, n);
                  ap[n-1] = MPFR_LIMB_HIGHBIT;
                  MPFR_SET_EXP (a, bx); /* No expo overflow! */
                  /* No Normalize is needed*/
                  /* No Rounding is needed */
                  MPFR_TMP_FREE (marker);
                  return 0;
                }
              /* carry = cp[k]/2+(cp[k-1]&1)<<(GMP_NUMB_BITS-1) = c'[k]*/
              else if (bp[k] > carry)
                goto SubD1NoLose;
              else
                {
                  MPFR_ASSERTD(bp[k]<carry);
                  goto SubD1Lose;
                }
            }
        }
    }
  else if (MPFR_UNLIKELY(d >= p))
    {
      ap = MPFR_MANT(a);
      MPFR_UNSIGNED_MINUS_MODULO(sh, p);
      /* We can't set A before since we use cp for rounding... */
      /* Perform rounding: check if a=b or a=b-ulp(b) */
      if (MPFR_UNLIKELY(d == p))
        {
          /* cp == -1 and c'p+1 = ? */
          bcp  = 1;
          /* We need Cp+1 later for a very improbable case. */
          bbcp = (MPFR_MANT(c)[n-1] & (MPFR_LIMB_ONE<<(GMP_NUMB_BITS-2)));
          /* We need also C'p+1 for an even more unprobable case... */
          if (MPFR_LIKELY( bbcp ))
            bcp1 = 1;
          else
            {
              cp = MPFR_MANT(c);
              if (MPFR_UNLIKELY(cp[n-1] == MPFR_LIMB_HIGHBIT))
                {
                  mp_size_t k = n-1;
                  do {
                    k--;
                  } while (k>=0 && cp[k]==0);
                  bcp1 = (k>=0);
                }
              else
                bcp1 = 1;
            }
          DEBUG( printf("(D=P) Cp=-1 Cp+1=%d C'p+1=%d \n", bbcp!=0, bcp1!=0) );
          bp = MPFR_MANT (b);

          /* Even if src and dest overlap, it is ok using MPN_COPY */
          if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
            {
              if (MPFR_UNLIKELY( bcp && bcp1==0 ))
                /* Cp=-1 and C'p+1=0: Even rule Apply! */
                /* Check Ap-1 = Bp-1 */
                if ((bp[0] & (MPFR_LIMB_ONE<<sh)) == 0)
                  {
                    MPN_COPY(ap, bp, n);
                    goto truncate;
                  }
              MPN_COPY(ap, bp, n);
              goto sub_one_ulp;
            }
          MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
          if (rnd_mode == MPFR_RNDZ)
            {
              MPN_COPY(ap, bp, n);
              goto sub_one_ulp;
            }
          else
            {
              MPN_COPY(ap, bp, n);
              goto truncate;
            }
        }
      else
        {
          /* Cp=0, Cp+1=-1 if d==p+1, C'p+1=-1 */
          bcp = 0; bbcp = (d==p+1); bcp1 = 1;
          DEBUG( printf("(D>P) Cp=%d Cp+1=%d C'p+1=%d\n", bcp!=0,bbcp!=0,bcp1!=0) );
          /* Need to compute C'p+2 if d==p+1 and if rnd_mode=NEAREST
             (Because of a very improbable case) */
          if (MPFR_UNLIKELY(d==p+1 && rnd_mode==MPFR_RNDN))
            {
              cp = MPFR_MANT(c);
              if (MPFR_UNLIKELY(cp[n-1] == MPFR_LIMB_HIGHBIT))
                {
                  mp_size_t k = n-1;
                  do {
                    k--;
                  } while (k>=0 && cp[k]==0);
                  bbcp1 = (k>=0);
                }
              else
                bbcp1 = 1;
              DEBUG( printf("(D>P) C'p+2=%d\n", bbcp1!=0) );
            }
          /* Copy mantissa B in A */
          MPN_COPY(ap, MPFR_MANT(b), n);
          /* Round */
          if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
            goto truncate;
          MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
          if (rnd_mode == MPFR_RNDZ)
            goto sub_one_ulp;
          else /* rnd_mode = AWAY */
            goto truncate;
        }
    }
  else
    {
      mpfr_uexp_t dm;
      mp_size_t m;
      mp_limb_t mask;

      /* General case: 2 <= d < p */
      MPFR_UNSIGNED_MINUS_MODULO(sh, p);
      cp = MPFR_TMP_LIMBS_ALLOC (n);

      /* Shift c in temporary allocated place */
      dm = d % GMP_NUMB_BITS;
      m = d / GMP_NUMB_BITS;
      if (MPFR_UNLIKELY(dm == 0))
        {
          /* dm = 0 and m > 0: Just copy */
          MPFR_ASSERTD(m!=0);
          MPN_COPY(cp, MPFR_MANT(c)+m, n-m);
          MPN_ZERO(cp+n-m, m);
        }
      else if (MPFR_LIKELY(m == 0))
        {
          /* dm >=2 and m == 0: just shift */
          MPFR_ASSERTD(dm >= 2);
          mpn_rshift(cp, MPFR_MANT(c), n, dm);
        }
      else
        {
          /* dm > 0 and m > 0: shift and zero  */
          mpn_rshift(cp, MPFR_MANT(c)+m, n-m, dm);
          MPN_ZERO(cp+n-m, m);
        }

      DEBUG( mpfr_print_mant_binary("Before", MPFR_MANT(c), p) );
      DEBUG( mpfr_print_mant_binary("B=    ", MPFR_MANT(b), p) );
      DEBUG( mpfr_print_mant_binary("After ", cp, p) );

      /* Compute bcp=Cp and bcp1=C'p+1 */
      if (MPFR_LIKELY(sh))
        {
          /* Try to compute them from C' rather than C (FIXME: Faster?) */
          bcp = (cp[0] & (MPFR_LIMB_ONE<<(sh-1))) ;
          if (MPFR_LIKELY( cp[0] & MPFR_LIMB_MASK(sh-1) ))
            bcp1 = 1;
          else
            {
              /* We can't compute C'p+1 from C'. Compute it from C */
              /* Start from bit x=p-d+sh in mantissa C
                 (+sh since we have already looked sh bits in C'!) */
              mpfr_prec_t x = p-d+sh-1;
              if (MPFR_LIKELY(x>p))
                /* We are already looked at all the bits of c, so C'p+1 = 0*/
                bcp1 = 0;
              else
                {
                  mp_limb_t *tp = MPFR_MANT(c);
                  mp_size_t kx = n-1 - (x / GMP_NUMB_BITS);
                  mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
                  DEBUG (printf ("(First) x=%lu Kx=%ld Sx=%lu\n",
                                 (unsigned long) x, (long) kx,
                                 (unsigned long) sx));
                  /* Looks at the last bits of limb kx (if sx=0 does nothing)*/
                  if (tp[kx] & MPFR_LIMB_MASK(sx))
                    bcp1 = 1;
                  else
                    {
                      /*kx += (sx==0);*/
                      /*If sx==0, tp[kx] hasn't been checked*/
                      do {
                        kx--;
                      } while (kx>=0 && tp[kx]==0);
                      bcp1 = (kx >= 0);
                    }
                }
            }
        }
      else
        {
          /* Compute Cp and C'p+1 from C with sh=0 */
          mp_limb_t *tp = MPFR_MANT(c);
          /* Start from bit x=p-d in mantissa C */
          mpfr_prec_t  x = p-d;
          mp_size_t   kx = n-1 - (x / GMP_NUMB_BITS);
          mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
          MPFR_ASSERTD(p >= d);
          bcp = (tp[kx] & (MPFR_LIMB_ONE<<sx));
          /* Looks at the last bits of limb kx (If sx=0, does nothing)*/
          if (tp[kx] & MPFR_LIMB_MASK(sx))
            bcp1 = 1;
          else
            {
              /*kx += (sx==0);*/ /*If sx==0, tp[kx] hasn't been checked*/
              do {
                kx--;
              } while (kx>=0 && tp[kx]==0);
              bcp1 = (kx>=0);
            }
        }
      DEBUG( printf("sh=%lu Cp=%d C'p+1=%d\n", sh, bcp!=0, bcp1!=0) );

      /* Check if we can lose a bit, and if so compute Cp+1 and C'p+2 */
      bp = MPFR_MANT(b);
      if (MPFR_UNLIKELY((bp[n-1]-cp[n-1]) <= MPFR_LIMB_HIGHBIT))
        {
          /* We can lose a bit so we precompute Cp+1 and C'p+2 */
          /* Test for trivial case: since C'p+1=0, Cp+1=0 and C'p+2 =0 */
          if (MPFR_LIKELY(bcp1 == 0))
            {
              bbcp = 0;
              bbcp1 = 0;
            }
          else /* bcp1 != 0 */
            {
              /* We can lose a bit:
                 compute Cp+1 and C'p+2 from mantissa C */
              mp_limb_t *tp = MPFR_MANT(c);
              /* Start from bit x=(p+1)-d in mantissa C */
              mpfr_prec_t x  = p+1-d;
              mp_size_t kx = n-1 - (x/GMP_NUMB_BITS);
              mpfr_prec_t sx = GMP_NUMB_BITS-1-(x%GMP_NUMB_BITS);
              MPFR_ASSERTD(p > d);
              DEBUG (printf ("(pre) x=%lu Kx=%ld Sx=%lu\n",
                             (unsigned long) x, (long) kx,
                             (unsigned long) sx));
              bbcp = (tp[kx] & (MPFR_LIMB_ONE<<sx)) ;
              /* Looks at the last bits of limb kx (If sx=0, does nothing)*/
              /* If Cp+1=0, since C'p+1!=0, C'p+2=1 ! */
              if (MPFR_LIKELY(bbcp==0 || (tp[kx]&MPFR_LIMB_MASK(sx))))
                bbcp1 = 1;
              else
                {
                  /*kx += (sx==0);*/ /*If sx==0, tp[kx] hasn't been checked*/
                  do {
                    kx--;
                  } while (kx>=0 && tp[kx]==0);
                  bbcp1 = (kx>=0);
                  DEBUG (printf ("(Pre) Scan done for %ld\n", (long) kx));
                }
            } /*End of Bcp1 != 0*/
          DEBUG( printf("(Pre) Cp+1=%d C'p+2=%d\n", bbcp!=0, bbcp1!=0) );
        } /* End of "can lose a bit" */

      /* Clean shifted C' */
      mask = ~MPFR_LIMB_MASK (sh);
      cp[0] &= mask;

      /* Subtract the mantissa c from b in a */
      ap = MPFR_MANT(a);
      mpn_sub_n (ap, bp, cp, n);
      DEBUG( mpfr_print_mant_binary("Sub=  ", ap, p) );

     /* Normalize: we lose at max one bit*/
      if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
        {
          /* High bit is not set and we have to fix it! */
          /* Ap >= 010000xxx001 */
          mpn_lshift(ap, ap, n, 1);
          /* Ap >= 100000xxx010 */
          if (MPFR_UNLIKELY(bcp!=0)) /* Check if Cp = -1 */
            /* Since Cp == -1, we have to substract one more */
            {
              mpn_sub_1(ap, ap, n, MPFR_LIMB_ONE<<sh);
              MPFR_ASSERTD(MPFR_LIMB_MSB(ap[n-1]) != 0);
            }
          /* Ap >= 10000xxx001 */
          /* Final exponent -1 since we have shifted the mantissa */
          bx--;
          /* Update bcp and bcp1 */
          MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
          MPFR_ASSERTN(bbcp1 != (mp_limb_t) -1);
          bcp  = bbcp;
          bcp1 = bbcp1;
          /* We dont't have anymore a valid Cp+1!
             But since Ap >= 100000xxx001, the final sub can't unnormalize!*/
        }
      MPFR_ASSERTD( !(ap[0] & ~mask) );

      /* Rounding */
      if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
        {
          if (MPFR_LIKELY(bcp==0))
            goto truncate;
          else if ((bcp1) || ((ap[0] & (MPFR_LIMB_ONE<<sh)) != 0))
            goto sub_one_ulp;
          else
            goto truncate;
        }

      /* Update rounding mode */
      MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
      if (rnd_mode == MPFR_RNDZ && (MPFR_LIKELY(bcp || bcp1)))
        goto sub_one_ulp;
      goto truncate;
    }
  MPFR_RET_NEVER_GO_HERE ();

  /* Sub one ulp to the result */
 sub_one_ulp:
  mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
  /* Result should be smaller than exact value: inexact=-1 */
  inexact = -1;
  /* Check normalisation */
  if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
    {
      /* ap was a power of 2, and we lose a bit */
      /* Now it is 0111111111111111111[00000 */
      mpn_lshift(ap, ap, n, 1);
      bx--;
      /* And the lost bit x depends on Cp+1, and Cp */
      /* Compute Cp+1 if it isn't already compute (ie d==1) */
      /* FIXME: Is this case possible? */
      if (MPFR_UNLIKELY(d == 1))
        bbcp = 0;
      DEBUG( printf("(SubOneUlp)Cp=%d, Cp+1=%d C'p+1=%d\n", bcp!=0,bbcp!=0,bcp1!=0));
      /* Compute the last bit (Since we have shifted the mantissa)
         we need one more bit!*/
      MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
      if ( (rnd_mode == MPFR_RNDZ && bcp==0)
           || (rnd_mode==MPFR_RNDN && bbcp==0)
           || (bcp && bcp1==0) ) /*Exact result*/
        {
          ap[0] |= MPFR_LIMB_ONE<<sh;
          if (rnd_mode == MPFR_RNDN)
            inexact = 1;
          DEBUG( printf("(SubOneUlp) Last bit set\n") );
        }
      /* Result could be exact if C'p+1 = 0 and rnd == Zero
         since we have had one more bit to the result */
      /* Fixme: rnd_mode == MPFR_RNDZ needed ? */
      if (bcp1==0 && rnd_mode==MPFR_RNDZ)
        {
          DEBUG( printf("(SubOneUlp) Exact result\n") );
          inexact = 0;
        }
    }

  goto end_of_sub;

 truncate:
  /* Check if the result is an exact power of 2: 100000000000
     in which cases, we could have to do sub_one_ulp due to some nasty reasons:
     If Result is a Power of 2:
      + If rnd = AWAY,
      |  If Cp=-1 and C'p+1 = 0, SubOneUlp and the result is EXACT.
         If Cp=-1 and C'p+1 =-1, SubOneUlp and the result is above.
         Otherwise truncate
      + If rnd = NEAREST,
         If Cp= 0 and Cp+1  =-1 and C'p+2=-1, SubOneUlp and the result is above
         If cp=-1 and C'p+1 = 0, SubOneUlp and the result is exact.
         Otherwise truncate.
      X bit should always be set if SubOneUlp*/
  if (MPFR_UNLIKELY(ap[n-1] == MPFR_LIMB_HIGHBIT))
    {
      mp_size_t k = n-1;
      do {
        k--;
      } while (k>=0 && ap[k]==0);
      if (MPFR_UNLIKELY(k<0))
        {
          /* It is a power of 2! */
          /* Compute Cp+1 if it isn't already compute (ie d==1) */
          /* FIXME: Is this case possible? */
          if (d == 1)
            bbcp=0;
          DEBUG( printf("(Truncate) Cp=%d, Cp+1=%d C'p+1=%d C'p+2=%d\n", \
                 bcp!=0, bbcp!=0, bcp1!=0, bbcp1!=0) );
          MPFR_ASSERTN(bbcp != (mp_limb_t) -1);
          MPFR_ASSERTN((rnd_mode != MPFR_RNDN) || (bcp != 0) || (bbcp == 0) || (bbcp1 != (mp_limb_t) -1));
          if (((rnd_mode != MPFR_RNDZ) && bcp)
              ||
              ((rnd_mode == MPFR_RNDN) && (bcp == 0) && (bbcp) && (bbcp1)))
            {
              DEBUG( printf("(Truncate) Do sub\n") );
              mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
              mpn_lshift(ap, ap, n, 1);
              ap[0] |= MPFR_LIMB_ONE<<sh;
              bx--;
              /* FIXME: Explain why it works (or why not)... */
              inexact = (bcp1 == 0) ? 0 : (rnd_mode==MPFR_RNDN) ? -1 : 1;
              goto end_of_sub;
            }
        }
    }

  /* Calcul of Inexact flag.*/
  inexact = MPFR_LIKELY(bcp || bcp1) ? 1 : 0;

 end_of_sub:
  /* Update Expo */
  /* FIXME: Is this test really useful?
      If d==0      : Exact case. This is never called.
      if 1 < d < p : bx=MPFR_EXP(b) or MPFR_EXP(b)-1 > MPFR_EXP(c) > emin
      if d == 1    : bx=MPFR_EXP(b). If we could lose any bits, the exact
                     normalisation is called.
      if d >=  p   : bx=MPFR_EXP(b) >= MPFR_EXP(c) + p > emin
     After SubOneUlp, we could have one bit less.
      if 1 < d < p : bx >= MPFR_EXP(b)-2 >= MPFR_EXP(c) > emin
      if d == 1    : bx >= MPFR_EXP(b)-1 = MPFR_EXP(c) > emin.
      if d >=  p   : bx >= MPFR_EXP(b)-1 > emin since p>=2.
  */
  MPFR_ASSERTD( bx >= __gmpfr_emin);
  /*
    if (MPFR_UNLIKELY(bx < __gmpfr_emin))
    {
      DEBUG( printf("(Final Underflow)\n") );
      if (rnd_mode == MPFR_RNDN &&
          (bx < __gmpfr_emin - 1 ||
           (inexact >= 0 && mpfr_powerof2_raw (a))))
        rnd_mode = MPFR_RNDZ;
      MPFR_TMP_FREE(marker);
      return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
    }
  */
  MPFR_SET_EXP (a, bx);

  MPFR_TMP_FREE(marker);
  MPFR_RET (inexact * MPFR_INT_SIGN (a));
}
Пример #30
0
/* 
   Computes the quotient and remainder of { np, 2*dn } by { dp, dn }.
   We require dp to be normalised and inv to be a precomputed inverse 
   of { dp, dn } given by mpn_invert.
*/
mp_limb_t 
mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, 
                           mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
   mp_limb_t cy, lo, ret = 0, ret2 = 0;
   mp_size_t m, i;
   mp_ptr tp;
   TMP_DECL;

   TMP_MARK;

   ASSERT(mpn_is_invert(inv, dp, dn));

   if (mpn_cmp(np + dn, dp, dn) >= 0)
   {
      ret2 = 1;
      mpn_sub_n(np + dn, np + dn, dp, dn);
   }

   tp = TMP_ALLOC_LIMBS(2*dn + 1);
   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);
   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);
   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);
   ret += mpn_add_1(qp, qp, dn, cy);

   /* 
      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then
      DX < B^{2*dn} <= D(X+1), thus
      Let N' = { np + n - 1, n + 1 }
	  N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1
      N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B
      There is either one integer in this range, or two. However, in the latter case
	  the left hand bound is either an integer or < 2/B below one.
   */
     
   if (UNLIKELY(ret == 1))
   {
      ret -= mpn_sub_1(qp, qp, dn, 1);
      ASSERT(ret == 0);
   }

   ret -= mpn_sub_1(qp, qp, dn, 1); 
   if (UNLIKELY(ret == ~CNST_LIMB(0))) 
      ret += mpn_add_1(qp, qp, dn, 1);
   /* ret is now guaranteed to be 0 or 1*/
   ASSERT(ret == 0);

   m = dn + 1;
   if ((dn <= MPN_FFT_MUL_N_MINSIZE) || (ret))
   {
      mpn_mul_n(tp, qp, dp, dn);
   } else
   {
      mp_limb_t cy, cy2;
      
      if (m >= FFT_MULMOD_2EXPP1_CUTOFF)
         m = mpir_fft_adjust_limbs (m);
      cy = mpn_mulmod_Bexpp1_fft (tp, m, qp, dn, dp, dn);
      
      /* cy, {tp, m} = qp * dp mod (B^m+1) */ 
      cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);
      mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
          
      /* Make correction */    
      mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);
   }
   
   mpn_sub_n(np, np, tp, m);
   MPN_ZERO(np + m, 2*dn - m);
   while (np[dn] || mpn_cmp(np, dp, dn) >= 0)
   {
	   ret += mpn_add_1(qp, qp, dn, 1);
	   np[dn] -= mpn_sub_n(np, np, dp, dn);
   }
  
   /* Not possible for ret == 2 as we have qp*dp <= np */
   ASSERT(ret + ret2 < 2);

   TMP_FREE;

   return ret + ret2;
}