Beispiel #1
0
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.

   Iterates

     r' <-- r - r * (a^{k-1} r^k - 1) / n

   If

     a^{k-1} r^k = 1 (mod 2^m),

   then

     a^{k-1} r'^k = 1 (mod 2^{2m}),

   Compute the update term as

     r' = r - (a^{k-1} r^{k+1} - r) / k

   where we still have cancellation of low limbs.

 */
void
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
{
  mp_size_t sizes[GMP_LIMB_BITS * 2];
  mp_ptr akm1, tp, rnp, ep;
  mp_limb_t a0, r0, km1, kp1h, kinv;
  mp_size_t rn;
  unsigned i;

  TMP_DECL;

  ASSERT (n > 0);
  ASSERT (ap[0] & 1);
  ASSERT (k & 1);
  ASSERT (k >= 3);

  TMP_MARK;

  akm1 = TMP_ALLOC_LIMBS (4*n);
  tp = akm1 + n;

  km1 = k-1;
  /* FIXME: Could arrange the iteration so we don't need to compute
     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
     that we can use wraparound also for a*r, since the low half is
     unchanged from the previous iteration. Or possibly mulmid. Also,
     a r = a^{1/k}, so we get that value too, for free? */
  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */

  a0 = ap[0];
  binvert_limb (kinv, k);

  /* 4 bits: a^{1/k - 1} (mod 16):

	a % 8
	1 3 5 7
   k%4 +-------
     1 |1 1 1 1
     3 |1 9 9 1
  */
  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
#if GMP_NUMB_BITS > 32
  {
    unsigned prec = 32;
    do
      {
	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
	prec *= 2;
      }
    while (prec < GMP_NUMB_BITS);
  }
#endif

  rp[0] = r0;
  if (n == 1)
    {
      TMP_FREE;
      return;
    }

  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
  kp1h = k/2 + 1;

  /* FIXME: Special case for two limb iteration. */
  rnp = TMP_ALLOC_LIMBS (2*n + 1);
  ep = rnp + n;

  /* FIXME: Possible to this on the fly with some bit fiddling. */
  for (i = 0; n > 1; n = (n + 1)/2)
    sizes[i++] = n;

  rn = 1;

  while (i-- > 0)
    {
      /* Compute x^{k+1}. */
      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
			       final iteration. */
      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);

      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */

      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
      ASSERT (mpn_cmp (ep, rp, rn) == 0);

      ASSERT (sizes[i] <= 2*rn);
      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
      rn = sizes[i];
    }
  TMP_FREE;
}
Beispiel #2
0
/* 
   t = x - y - z or t = x - (y + z) which explains the name 
*/
mp_limb_t mpn_subadd_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n)
{
   mp_limb_t ret;

   ASSERT(n > 0);
   ASSERT_MPN(x, n);
   ASSERT_MPN(y, n);
   ASSERT_MPN(z, n);
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n));
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n));
   ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n));

   if (t == x && t == y && t == z)
      return mpn_neg(t,z,n);  

   if (t == x && t == y)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
      
      return ret;
   }

   if (t == x && t == z)
   {
      ret = mpn_sub_n(t, x, z, n);
      ret += mpn_sub_n(t, t, y, n);
   
      return ret;
   }

   if (t == y && t == z)
   {
      ret = mpn_add_n(t, y, z, n);
      ret += mpn_sub_n(t, x, t, n);
   
      return ret;
   }

   if (t == x)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
   
      return ret;
   }
 
   if (t == y)
   {
      ret = mpn_sub_n(t, x, y, n);
      ret += mpn_sub_n(t, t, z, n);
   
      return ret;
   }

   if (t == z)
   {
      ret = mpn_sub_n(t, x, z, n);
      ret += mpn_sub_n(t, t, y, n);
   
      return ret;
   }

   ret = mpn_sub_n(t, x, z, n);
   ret += mpn_sub_n(t, t, y, n);

   return ret;
}
Beispiel #3
0
void
mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
{
#if 1
  __mpf_struct uu;
  mp_limb_t ul;

  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }

  ul = u;
  uu._mp_size = 1;
  uu._mp_d = &ul;
  uu._mp_exp = 1;
  mpf_sub (r, &uu, v);

#else
  mp_srcptr up, vp;
  mp_ptr rp, tp;
  mp_size_t usize, vsize, rsize;
  mp_size_t prec;
  mp_exp_t uexp;
  mp_size_t ediff;
  int negate;
  mp_limb_t ulimb;
  TMP_DECL;

  vsize = v->_mp_size;

  /* Handle special cases that don't work in generic code below.  */
  if (u == 0)
    {
      mpf_neg (r, v);
      return;
    }
  if (vsize == 0)
    {
      mpf_set_ui (r, u);
      return;
    }

  /* If signs of U and V are different, perform addition.  */
  if (vsize < 0)
    {
      __mpf_struct v_negated;
      v_negated._mp_size = -vsize;
      v_negated._mp_exp = v->_mp_exp;
      v_negated._mp_d = v->_mp_d;
      mpf_add_ui (r, &v_negated, u);
      return;
    }

  /* Signs are now known to be the same.  */
  ASSERT (vsize > 0);
  ulimb = u;
  /* Make U be the operand with the largest exponent.  */
  negate = 1 < v->_mp_exp;
  prec = r->_mp_prec + negate;
  rp = r->_mp_d;
  if (negate)
    {
      usize = vsize;
      vsize = 1;
      up = v->_mp_d;
      vp = &ulimb;
      uexp = v->_mp_exp;
      ediff = uexp - 1;

      /* If U extends beyond PREC, ignore the part that does.  */
      if (usize > prec)
	{
	  up += usize - prec;
	  usize = prec;
	}
      ASSERT (ediff > 0);
    }
  else
    {
      vp = v->_mp_d;
      ediff = 1 - v->_mp_exp;
  /* Ignore leading limbs in U and V that are equal.  Doing
     this helps increase the precision of the result.  */
      if (ediff == 0 && ulimb == vp[vsize - 1])
	{
	  usize = 0;
	  vsize--;
	  uexp = 0;
	  /* Note that V might now have leading zero limbs.
	     In that case we have to adjust uexp.  */
	  for (;;)
	    {
	      if (vsize == 0) {
		rsize = 0;
		uexp = 0;
		goto done;
	      }
	      if ( vp[vsize - 1] != 0)
		break;
	      vsize--, uexp--;
	    }
	}
      else
	{
	  usize = 1;
	  uexp = 1;
	  up = &ulimb;
	}
      ASSERT (usize <= prec);
    }

  if (ediff >= prec)
    {
      /* V completely cancelled.  */
      if (rp != up)
	MPN_COPY (rp, up, usize);
      rsize = usize;
    }
  else
    {
  /* If V extends beyond PREC, ignore the part that does.
     Note that this can make vsize neither zero nor negative.  */
  if (vsize + ediff > prec)
    {
      vp += vsize + ediff - prec;
      vsize = prec - ediff;
    }

      /* Locate the least significant non-zero limb in (the needed
	 parts of) U and V, to simplify the code below.  */
      ASSERT (vsize > 0);
      for (;;)
	{
	  if (vp[0] != 0)
	    break;
	  vp++, vsize--;
	  if (vsize == 0)
	    {
	      MPN_COPY (rp, up, usize);
	      rsize = usize;
	      goto done;
	    }
	}
      for (;;)
	{
	  if (usize == 0)
	    {
	      MPN_COPY (rp, vp, vsize);
	      rsize = vsize;
	      negate ^= 1;
	      goto done;
	    }
	  if (up[0] != 0)
	    break;
	  up++, usize--;
	}

      ASSERT (usize > 0 && vsize > 0);
      TMP_MARK;

      tp = TMP_ALLOC_LIMBS (prec);

      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */

      if (usize > ediff)
	{
	  /* U and V partially overlaps.  */
	  if (ediff == 0)
	    {
	      ASSERT (usize == 1 && vsize >= 1 && ulimb == *up); /* usize is 1>ediff, vsize >= 1 */
	      if (1 < vsize)
		{
		  /* u        */
		  /* vvvvvvv  */
		  rsize = vsize;
		  vsize -= 1;
		  /* mpn_cmp (up, vp + vsize - usize, usize) > 0 */
		  if (ulimb > vp[vsize])
		    {
		      tp[vsize] = ulimb - vp[vsize] - 1;
		      ASSERT_CARRY (mpn_neg (tp, vp, vsize));
		    }
		  else
		    {
		      /* vvvvvvv  */  /* Swap U and V. */
		      /* u        */
		      MPN_COPY (tp, vp, vsize);
		      tp[vsize] = vp[vsize] - ulimb;
		      negate = 1;
		    }
		}
	      else /* vsize == usize == 1 */
		{
		  /* u     */
		  /* v     */
		  rsize = 1;
		  negate = ulimb < vp[0];
		  tp[0] = negate ? vp[0] - ulimb: ulimb - vp[0];
		}
	    }
	  else
	    {
	      ASSERT (vsize + ediff <= usize);
	      ASSERT (vsize == 1 && usize >= 2 && ulimb == *vp);
		{
		  /* uuuu     */
		  /*   v      */
		  mp_size_t size;
		  size = usize - ediff - 1;
		  MPN_COPY (tp, up, size);
		  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up + size, usize - size, ulimb));
		  rsize = usize;
		}
		/* Other cases are not possible */
		/* uuuu     */
		/*   vvvvv  */
	    }
	}
      else
	{
	  /* uuuu     */
	  /*      vv  */
	  mp_size_t size, i;
	  ASSERT_CARRY (mpn_neg (tp, vp, vsize));
	  rsize = vsize + ediff;
	  size = rsize - usize;
	  for (i = vsize; i < size; i++)
	    tp[i] = GMP_NUMB_MAX;
	  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up, usize, CNST_LIMB (1)));
	}

      /* Full normalize.  Optimize later.  */
      while (rsize != 0 && tp[rsize - 1] == 0)
	{
	  rsize--;
	  uexp--;
	}
      MPN_COPY (rp, tp, rsize);
      TMP_FREE;
    }

 done:
  r->_mp_size = negate ? -rsize : rsize;
  r->_mp_exp = uexp;
#endif
}
Beispiel #4
0
// Cast num/den to an int, rounding towards nearest.  All inputs are destroyed.  Take a sqrt if desired.
// The values array must consist of r numerators followed by one denominator.
void snap_divs(RawArray<Quantized> result, RawArray<mp_limb_t,2> values, const bool take_sqrt) {
  assert(result.size()+1==values.m);

  // For division, we seek x s.t.
  //   x-1/2 <= num/den <= x+1/2
  //   2x-1 <= 2num/den <= 2x+1
  //   2x-1 <= floor(2num/den) <= 2x+1
  //   2x <= 1+floor(2num/den) <= 2x+2
  //   x <= (1+floor(2num/den))//2 <= x+1
  //   x = (1+floor(2num/den))//2

  // In the sqrt case, we seek a nonnegative integer x s.t.
  //   x-1/2 <= sqrt(num/den) < x+1/2
  //   2x-1 <= sqrt(4num/den) < 2x+1
  // Now the leftmost and rightmost expressions are integral, so we can take floors to get
  //   2x-1 <= floor(sqrt(4num/den)) < 2x+1
  // Since sqrt is monotonic and maps integers to integers, floor(sqrt(floor(x))) = floor(sqrt(x)), so
  //   2x-1 <= floor(sqrt(floor(4num/den))) < 2x+1
  //   2x <= 1+floor(sqrt(floor(4num/den))) < 2x+2
  //   x <= (1+floor(sqrt(floor(4num/den))))//2 < x+1
  //   x = (1+floor(sqrt(floor(4num/den))))//2

  // Thus, both cases look like
  //   x = (1+f(2**k*num/den))//2
  // where k = 1 or 2 and f is some truncating integer op (division or division+sqrt).

  // Adjust denominator to be positive
  const auto raw_den = values[result.size()];
  const bool den_negative = mp_limb_signed_t(raw_den.back())<0;
  if (den_negative)
    mpn_neg(raw_den.data(),raw_den.data(),raw_den.size());
  const auto den = trim(raw_den);
  assert(den.size()); // Zero should be prevented by the caller

  // Prepare for divisions
  const auto q = GEODE_RAW_ALLOCA(values.n-den.size()+1,mp_limb_t),
             r = GEODE_RAW_ALLOCA(den.size(),mp_limb_t);

  // Compute each component of the result
  for (int i=0;i<result.size();i++) {
    // Adjust numerator to be positive
    const auto num = values[i];
    const bool num_negative = mp_limb_signed_t(num.back())<0;
    if (take_sqrt && num_negative!=den_negative && !num.contains_only(0))
      throw RuntimeError("perturbed_ratio: negative value in square root");
    if (num_negative)
      mpn_neg(num.data(),num.data(),num.size());

    // Add enough bits to allow round-to-nearest computation after performing truncating operations
    mpn_lshift(num.data(),num.data(),num.size(),take_sqrt?2:1);
    // Perform division
    mpn_tdiv_qr(q.data(),r.data(),0,num.data(),num.size(),den.data(),den.size());
    const auto trim_q = trim(q);
    if (!trim_q.size()) {
      result[i] = 0;
      continue;
    }
    // Take sqrt if desired, reusing the num buffer
    const auto s = take_sqrt ? sqrt_helper(num,trim_q) : trim_q;

    // Verify that result lies in [-exact::bound,exact::bound];
    const int ratio = sizeof(ExactInt)/sizeof(mp_limb_t);
    static_assert(ratio<=2,"");
    if (s.size() > ratio)
      goto overflow;
    const auto nn = ratio==2 && s.size()==2 ? s[0]|ExactInt(s[1])<<8*sizeof(mp_limb_t) : s[0],
               n = (1+nn)/2;
    if (uint64_t(n) > uint64_t(exact::bound))
      goto overflow;

    // Done!
    result[i] = (num_negative==den_negative?1:-1)*Quantized(n);
  }

  return;
  overflow:
  throw OverflowError("perturbed_ratio: overflow in l'Hopital expansion");
}