Beispiel #1
0
static void
ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
{
  mp_ptr tp;
  mp_size_t tn;
  mp_limb_t cy;

  if (vn < TOOM3_THRESHOLD)
    {
      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
	 mul_basecase.  */
      if (vn != 0)
	mul_basecase (wp, up, un, vp, vn);
      else
	MPN_ZERO (wp, un);
      return;
    }

  if (vn < FFT_THRESHOLD)
    {
      /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n.  */
      tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn);
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }
  else
    {
      /* Finally, for the largest operands, use mpn_toom3_mul_n.  */
      /* The "- 63 + 255" tweaks the allocation to allow for huge operands.
	 See the definition of this macro in gmp-impl.h to understand this.  */
      tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255;
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }

  if (un != vn)
    {
      if (un - vn < vn)
	ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn);
      else
	ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn);

      MPN_COPY (wp, tp, vn);
      cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn);
      mpn_incr_u (wp + 2 * vn, cy);
    }
  else
    {
      MPN_COPY (wp, tp, 2 * vn);
    }

  __GMP_FREE_FUNC_LIMBS (tp, tn);
}
Beispiel #2
0
void
mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n)
{
  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));

#if 0
  /* FIXME: Can this be removed? */
  if (n == 0)
    return;
#endif

  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (p, a, n, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
    {
      mpn_sqr_basecase (p, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
      mpn_kara_sqr_n (p, a, n, ws);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
#else
  else if (BELOW_THRESHOLD (n, MPN_TOOM3_MAX_N))
#endif
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
      mpn_toom3_sqr_n (p, a, n, ws);
      TMP_SFREE;
    }
  else
#if WANT_FFT || TUNE_PROGRAM_BUILD
    {
      /* The current FFT code allocates its own space.  That should probably
	 change.  */
      mpn_mul_fft_full (p, a, n, a, n);
    }
#else
    {
      /* Toom3 for large operands.  Use workspace from the heap, as stack space
      may be limited.  Since n is at least MUL_TOOM3_THRESHOLD, multiplication
      will take much longer than malloc()/free().  */
      mp_ptr ws;  mp_size_t ws_size;
      ws_size = MPN_TOOM3_SQR_N_TSIZE (n);
      ws = __GMP_ALLOCATE_FUNC_LIMBS (ws_size);
      mpn_toom3_sqr_n (p, a, n, ws);
      __GMP_FREE_FUNC_LIMBS (ws, ws_size);
    }
#endif
}
Beispiel #3
0
void
mpq_init (mpq_t x)
{
  ALLOC(NUM(x)) = 1;
  PTR(NUM(x)) = __GMP_ALLOCATE_FUNC_LIMBS (1);
  SIZ(NUM(x)) = 0;
  ALLOC(DEN(x)) = 1;
  PTR(DEN(x)) = __GMP_ALLOCATE_FUNC_LIMBS (1);
  PTR(DEN(x))[0] = 1;
  SIZ(DEN(x)) = 1;

#ifdef __CHECKER__
  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
  PTR(NUM(x))[0] = 0;
#endif
}
Beispiel #4
0
void
mpf_init_set (mpf_ptr r, mpf_srcptr s)
{
    mp_ptr rp, sp;
    mp_size_t ssize, size;
    mp_size_t prec;

    prec = __gmp_default_fp_limb_precision;
    r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
    r->_mp_prec = prec;

    prec++;		/* lie not to lose precision in assignment */
    ssize = s->_mp_size;
    size = ABS (ssize);

    rp = r->_mp_d;
    sp = s->_mp_d;

    if (size > prec)
    {
        sp += size - prec;
        size = prec;
    }

    r->_mp_exp = s->_mp_exp;
    r->_mp_size = ssize >= 0 ? size : -size;

    MPN_COPY (rp, sp, size);
}
Beispiel #5
0
mp_limb_t	mpn_sumdiff_n(mp_ptr s,mp_ptr d,mp_srcptr x,mp_srcptr y,mp_size_t n)
{mp_limb_t ret;mp_ptr t;

ASSERT(n>0);
ASSERT_MPN(x,n);ASSERT_MPN(y,n);//ASSERT_SPACE(s,n);ASSERT_SPACE(d,n);
ASSERT(MPN_SAME_OR_SEPARATE_P(s,x,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(s,y,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(d,x,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(d,y,n));
ASSERT(!MPN_OVERLAP_P(s,n,d,n));

if( (s==x && d==y)||(s==y && d==x) )
  {t=__GMP_ALLOCATE_FUNC_LIMBS(n);
   ret=mpn_sub_n(t,x,y,n);
   ret+=2*mpn_add_n(s,x,y,n);
   MPN_COPY(d,t,n);
   __GMP_FREE_FUNC_LIMBS(t,n);
   return ret;}
if(s==x || s==y)
  {ret=mpn_sub_n(d,x,y,n);
   ret+=2*mpn_add_n(s,x,y,n);
   return ret;}
ret=2*mpn_add_n(s,x,y,n);
ret+=mpn_sub_n(d,x,y,n);
return ret;}
Beispiel #6
0
void
mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
{
  mp_ptr rootp, up, remp;
  mp_size_t us, un, rootn, remn;

  up = PTR(u);
  us = SIZ(u);

  /* even roots of negatives provoke an exception */
  if (us < 0 && (nth & 1) == 0)
    SQRT_OF_NEGATIVE;

  /* root extraction interpreted as c^(1/nth) means a zeroth root should
     provoke a divide by zero, do this even if c==0 */
  if (nth == 0)
    DIVIDE_BY_ZERO;

  if (us == 0)
    {
      if (root != NULL)
	SIZ(root) = 0;
      SIZ(rem) = 0;
      return;
    }

  un = ABS (us);
  rootn = (un - 1) / nth + 1;

  if (root != NULL)
    {
      rootp = MPZ_REALLOC (root, rootn);
      up = PTR(u);
    }
  else
    {
      rootp = __GMP_ALLOCATE_FUNC_LIMBS (rootn);
    }

  MPZ_REALLOC (rem, un);
  remp = PTR(rem);

  if (nth == 1)
    {
      MPN_COPY (rootp, up, un);
      remn = 0;
    }
  else
    {
      remn = mpn_rootrem (rootp, remp, up, un, nth);
    }

  if (root != NULL)
    SIZ(root) = us >= 0 ? rootn : -rootn;
  else
    __GMP_FREE_FUNC_LIMBS (rootp, rootn);

  SIZ(rem) = remn;
}
Beispiel #7
0
void
mpf_init (mpf_ptr r)
{
  mp_size_t prec = __gmp_default_fp_limb_precision;
  r->_mp_size = 0;
  r->_mp_exp = 0;
  r->_mp_prec = prec;
  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
}
Beispiel #8
0
int
mpf_init_set_str (mpf_ptr r, const char *s, int base)
{
  mp_size_t prec = __gmp_default_fp_limb_precision;
  r->_mp_size = 0;
  r->_mp_exp = 0;
  r->_mp_prec = prec;
  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);

  return mpf_set_str (r, s, base);
}
Beispiel #9
0
void
mpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)
{
  ASSERT (size >= 0);

  MPN_NORMALIZE (p, size);
  ALLOC(z) = MAX (size, 1);
  PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z));
  SIZ(z) = size;
  MPN_COPY (PTR(z), p, size);
}
Beispiel #10
0
void
mpz_init (mpz_ptr x)
{
  ALLOC (x) = 1;
  PTR (x) = __GMP_ALLOCATE_FUNC_LIMBS (1);
  SIZ (x) = 0;

#ifdef __CHECKER__
  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
  PTR (x)[0] = 0;
#endif
}
Beispiel #11
0
void
mpn_sqr_n (mp_ptr prodp,
	   mp_srcptr up, mp_size_t un)
{
  ASSERT (un >= 1);
  ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));

  /* FIXME: Can this be removed? */
  if (un == 0)
    return;

  if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (prodp, up, un, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
    { /* plain schoolbook multiplication */
      mpn_sqr_basecase (prodp, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
    { /* karatsuba multiplication */
      mp_ptr tspace;
      TMP_DECL (marker);
      TMP_MARK (marker);
      tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
      mpn_kara_sqr_n (prodp, up, un, tspace);
      TMP_FREE (marker);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
#else
  else
#endif
    { /* Toom3 multiplication.
	 Use workspace from the heap, as stack may be limited.  Since n is
	 at least MUL_TOOM3_THRESHOLD, the multiplication will take much
	 longer than malloc()/free().  */
      mp_ptr     tspace;
      mp_size_t  tsize;
      tsize = MPN_TOOM3_SQR_N_TSIZE (un);
      tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
      mpn_toom3_sqr_n (prodp, up, un, tspace);
      __GMP_FREE_FUNC_LIMBS (tspace, tsize);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else
    {
Beispiel #12
0
void
mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
{
  mp_ptr p;
  mp_size_t i;
  mp_size_t nlimbs;

  nlimbs = nbits / GMP_NUMB_BITS + 1;
  p = __GMP_ALLOCATE_FUNC_LIMBS (arr_size * nlimbs);

  for (i = 0; i < arr_size; i++)
    {
      ALLOC (&arr[i]) = nlimbs + 1; /* Yes, lie a little... */
      SIZ (&arr[i]) = 0;
      PTR (&arr[i]) = p + i * nlimbs;
    }
}
Beispiel #13
0
static void
refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
{
  mp_size_t usize = u->_mp_size;
  mp_size_t vsize = v->_mp_size;
  mp_size_t wsize;
  mp_size_t sign_product;
  mp_ptr up, vp;
  mp_ptr wp;
  mp_size_t talloc;

  sign_product = usize ^ vsize;
  usize = ABS (usize);
  vsize = ABS (vsize);

  if (usize == 0 || vsize == 0)
    {
      SIZ (w) = 0;
      return;
    }

  talloc = usize + vsize;

  up = u->_mp_d;
  vp = v->_mp_d;

  wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);

  if (usize > vsize)
    refmpn_mul (wp, up, usize, vp, vsize);
  else
    refmpn_mul (wp, vp, vsize, up, usize);
  wsize = usize + vsize;
  wsize -= wp[wsize - 1] == 0;
  MPZ_REALLOC (w, wsize);
  MPN_COPY (PTR(w), wp, wsize);

  SIZ(w) = sign_product < 0 ? -wsize : wsize;
  __GMP_FREE_FUNC_LIMBS (wp, talloc);
}
Beispiel #14
0
void
mpz_inits (mpz_ptr x, ...)
{
  va_list  ap;

  va_start (ap, x);

  while (x != NULL)
    {
      ALLOC (x) = 1;
      PTR (x) = __GMP_ALLOCATE_FUNC_LIMBS (1);
      SIZ (x) = 0;

#ifdef __CHECKER__
      /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
      PTR (x)[0] = 0;
#endif

      x = va_arg (ap, mpz_ptr);
    }

  va_end (ap);
}
Beispiel #15
0
void
mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n)
{
  mp_size_t len1, ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4;

  len1 = n;
  ASSERT (n >= 1);

  MPN_NORMALIZE(up, len1);
  
  sn = (n - 1) / 4 + 1;

  /* a0 - a3 are defined in mpn_toom4_mul_n above */
  
   TC4_NORM(a0, a0n, sn);
	TC4_NORM(a1, a1n, sn);
	TC4_NORM(a2, a2n, sn);
	TC4_NORM(a3, a3n, n - 3*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

   tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); 
	tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); 
   tc4_sub(u3, &n3, u4, n4, u5, n5);

	SQR_TC4(r4, n4, u3, n3);
   SQR_TC4_UNSIGNED(r3, n3, u2, n2);
	
	tc4_lshift(r1, &n1, a0, a0n, 3);
	tc4_addlsh1_unsigned(r1, &n1, a2, a2n);
 	tc4_lshift(r2, &n8, a1, a1n, 2);
   tc4_add(r2, &n8, r2, n8, a3, a3n);
   tc4_add(u4, &n9, r1, n1, r2, n8);
   tc4_sub(u5, &n5, r1, n1, r2, n8);
   
	r30 = r3[0];
	if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
	SQR_TC4(r6, n6, u5, n5);
   SQR_TC4_UNSIGNED(r5, n5, u4, n9);
   r3[1] = r31;

   tc4_lshift(u2, &n8, a3, a3n, 3);
   tc4_addmul_1(u2, &n8, a2, a2n, 4);
	tc4_addlsh1_unsigned(u2, &n8, a1, a1n);
	tc4_add(u2, &n8, u2, n8, a0, a0n);
   
	SQR_TC4_UNSIGNED(r2, n2, u2, n8);
   SQR_TC4_UNSIGNED(r1, n1, a3, a3n);
   SQR_TC4_UNSIGNED(r7, n7, a0, a0n);

	TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1);
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++)
        (r3)[ind] = CNST_LIMB(0);
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

	mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

	if (rpn != 2*n) 
	{
		MPN_ZERO((rp + rpn), 2*n - rpn);
	}

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}
Beispiel #16
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, tz;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dp, qp, rp;
  mp_limb_t rh;
  mp_limb_t t;
  mp_limb_t dinv;
  int count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }


  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (tz);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = 0; test < count;)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
      if (maxdbits > nbits)
	dbits = random_word (rands) % nbits + 1;
      else
	dbits = random_word (rands) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	{
	  RANDFUNC (n, rands, nbits);
	  do
	    {
	      RANDFUNC (d, rands, dbits);
	    }
	  while (mpz_sgn (d) == 0);

	  np = PTR (n);
	  dp = PTR (d);
	  nn = SIZ (n);
	  dn = SIZ (d);
	}
      while (nn < dn);

      dp[0] |= 1;

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	dp[0] = GMP_NUMB_MAX;

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = 0; i <= clearn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	}

      test++;

      binvert_limb (dinv, dp[0]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_qr */
	      MPN_ZERO (qp, nn - dn);
	      MPN_ZERO (rp, dn);
	      MPN_COPY (rp, np, nn);
	      rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
	    }

	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_q */
	      MPN_COPY (rp, np, nn);
	      MPN_ZERO (qp, nn - dn);
	      mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
	    }
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_qr */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_q */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
	}

      if (nn > dn)
	{
	  /* Test mpn_bdiv_qr */
	  itch = mpn_bdiv_qr_itch (nn, dn);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_ZERO (qp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);

	  check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
	}

      if (nn - dn < 2 || dn < 2)
	continue;

      /* Test mpn_mu_bdiv_qr */
      itch = mpn_mu_bdiv_qr_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn);
      MPN_ZERO (rp, dn);
      rp[dn] = rran1;
      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");

      /* Test mpn_mu_bdiv_q */
      itch = mpn_mu_bdiv_q_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn + 1);
      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (tz);

  tests_end ();
  return 0;
}
Beispiel #17
0
/* For now, also disable REDC when MOD is even, as the inverse can't handle
   that.  At some point, we might want to make the code faster for that case,
   perhaps using CRR.  */

#ifndef POWM_THRESHOLD
#define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
#endif

#define HANDLE_NEGATIVE_EXPONENT 1
#undef REDUCE_EXPONENT

void
#ifndef BERKELEY_MP
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
#else /* BERKELEY_MP */
pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
#endif /* BERKELEY_MP */
{
  mp_ptr xp, tp, qp, gp, this_gp;
  mp_srcptr bp, ep, mp;
  mp_size_t bn, es, en, mn, xn;
  mp_limb_t invm, c;
  unsigned long int enb;
  mp_size_t i, K, j, l, k;
  int m_zero_cnt, e_zero_cnt;
  int sh;
  int use_redc;
#if HANDLE_NEGATIVE_EXPONENT
  mpz_t new_b;
#endif
#if REDUCE_EXPONENT
  mpz_t new_e;
#endif
  TMP_DECL (marker);

  mp = PTR(m);
  mn = ABSIZ (m);
  if (mn == 0)
    DIVIDE_BY_ZERO;

  TMP_MARK (marker);

  es = SIZ (e);
  if (es <= 0)
    {
      if (es == 0)
	{
	  /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
	     m equals 1.  */
	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
	  PTR(r)[0] = 1;
	  TMP_FREE (marker);	/* we haven't really allocated anything here */
	  return;
	}
#if HANDLE_NEGATIVE_EXPONENT
      MPZ_TMP_INIT (new_b, mn + 1);

      if (! mpz_invert (new_b, b, m))
	DIVIDE_BY_ZERO;
      b = new_b;
      es = -es;
#else
      DIVIDE_BY_ZERO;
#endif
    }
  en = es;

#if REDUCE_EXPONENT
  /* Reduce exponent by dividing it by phi(m) when m small.  */
  if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
    {
      MPZ_TMP_INIT (new_e, 2);
      mpz_mod_ui (new_e, e, phi (mp[0]));
      e = new_e;
    }
#endif

  use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
  if (use_redc)
    {
      /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
      modlimb_invert (invm, mp[0]);
      invm = -invm;
    }
  else
    {
      /* Normalize m (i.e. make its most significant bit set) as required by
	 division functions below.  */
      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
      m_zero_cnt -= GMP_NAIL_BITS;
      if (m_zero_cnt != 0)
	{
	  mp_ptr new_mp;
	  new_mp = TMP_ALLOC_LIMBS (mn);
	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
	  mp = new_mp;
	}
    }

  /* Determine optimal value of k, the number of exponent bits we look at
     at a time.  */
  count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
  e_zero_cnt -= GMP_NAIL_BITS;
  enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
  k = 1;
  K = 2;
  while (2 * enb > K * (2 + k * (3 + k)))
    {
      k++;
      K *= 2;
    }

  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
  qp = TMP_ALLOC_LIMBS (mn + 1);

  gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);

  /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
  bn = ABSIZ (b);
  bp = PTR(b);
  /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
     for speed or correctness to do this when b and m have the same number of
     limbs, perhaps remove mpn_cmp call.  */
  if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
    {
      /* Reduce possibly huge base while moving it to gp[0].  Use a function
	 call to reduce, since we don't want the quotient allocation to
	 live until function return.  */
      if (use_redc)
	{
	  reduce (tp + mn, bp, bn, mp, mn);	/* b mod m */
	  MPN_ZERO (tp, mn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
	}
      else
	{
	  reduce (gp, bp, bn, mp, mn);
	}
    }
  else
    {
      /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
	 the rest of the function, but slows things down when the |b| << m.  */
      if (use_redc)
	{
	  MPN_ZERO (tp, mn);
	  MPN_COPY (tp + mn, bp, bn);
	  MPN_ZERO (tp + mn + bn, mn - bn);
	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
	}
      else
	{
	  MPN_COPY (gp, bp, bn);
	  MPN_ZERO (gp + bn, mn - bn);
	}
    }

  /* Compute xx^i for odd g < 2^i.  */

  xp = TMP_ALLOC_LIMBS (mn);
  mpn_sqr_n (tp, gp, mn);
  if (use_redc)
    redc (xp, mp, mn, invm, tp);		/* xx = x^2*R^n */
  else
    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
  this_gp = gp;
  for (i = 1; i < K / 2; i++)
    {
      mpn_mul_n (tp, this_gp, xp, mn);
      this_gp += mn;
      if (use_redc)
	redc (this_gp, mp, mn, invm, tp);	/* g[i] = x^(2i+1)*R^n */
      else
	mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
    }

  /* Start the real stuff.  */
  ep = PTR (e);
  i = en - 1;				/* current index */
  c = ep[i];				/* current limb */
  sh = GMP_NUMB_BITS - e_zero_cnt;	/* significant bits in ep[i] */
  sh -= k;				/* index of lower bit of ep[i] to take into account */
  if (sh < 0)
    {					/* k-sh extra bits are needed */
      if (i > 0)
	{
	  i--;
	  c <<= (-sh);
	  sh += GMP_NUMB_BITS;
	  c |= ep[i] >> sh;
	}
    }
Beispiel #18
0
void
mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
{
  mp_size_t usize;
  mp_size_t vsize;
  mp_size_t wsize;
  mp_size_t sign_product;
  mp_ptr up, vp;
  mp_ptr wp;
  mp_ptr free_me;
  size_t free_me_size;
  mp_limb_t cy_limb;
  TMP_DECL;

  usize = SIZ (u);
  vsize = SIZ (v);
  sign_product = usize ^ vsize;
  usize = ABS (usize);
  vsize = ABS (vsize);

  if (usize < vsize)
    {
      MPZ_SRCPTR_SWAP (u, v);
      MP_SIZE_T_SWAP (usize, vsize);
    }

  if (vsize == 0)
    {
      SIZ (w) = 0;
      return;
    }

#if HAVE_NATIVE_mpn_mul_2
  if (vsize <= 2)
    {
      wp = MPZ_REALLOC (w, usize+vsize);
      if (vsize == 1)
	cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
      else
	{
	  cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v));
	  usize++;
	}
      wp[usize] = cy_limb;
      usize += (cy_limb != 0);
      SIZ (w) = (sign_product >= 0 ? usize : -usize);
      return;
    }
#else
  if (vsize == 1)
    {
      wp = MPZ_REALLOC (w, usize+1);
      cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
      wp[usize] = cy_limb;
      usize += (cy_limb != 0);
      SIZ (w) = (sign_product >= 0 ? usize : -usize);
      return;
    }
#endif

  TMP_MARK;
  free_me = NULL;
  up = PTR (u);
  vp = PTR (v);
  wp = PTR (w);

  /* Ensure W has space enough to store the result.  */
  wsize = usize + vsize;
  if (ALLOC (w) < wsize)
    {
      if (wp == up || wp == vp)
	{
	  free_me = wp;
	  free_me_size = ALLOC (w);
	}
      else
	(*__gmp_free_func) (wp, (size_t) ALLOC (w) * GMP_LIMB_BYTES);

      ALLOC (w) = wsize;
      wp = __GMP_ALLOCATE_FUNC_LIMBS (wsize);
      PTR (w) = wp;
    }
  else
    {
      /* Make U and V not overlap with W.  */
      if (wp == up)
	{
	  /* W and U are identical.  Allocate temporary space for U.  */
	  up = TMP_ALLOC_LIMBS (usize);
	  /* Is V identical too?  Keep it identical with U.  */
	  if (wp == vp)
	    vp = up;
	  /* Copy to the temporary space.  */
	  MPN_COPY (up, wp, usize);
	}
      else if (wp == vp)
	{
	  /* W and V are identical.  Allocate temporary space for V.  */
	  vp = TMP_ALLOC_LIMBS (vsize);
	  /* Copy to the temporary space.  */
	  MPN_COPY (vp, wp, vsize);
	}
    }

  if (up == vp)
    {
      mpn_sqr (wp, up, usize);
      cy_limb = wp[wsize - 1];
    }
  else
    {
      cy_limb = mpn_mul (wp, up, usize, vp, vsize);
    }

  wsize -= cy_limb == 0;

  SIZ (w) = sign_product < 0 ? -wsize : wsize;
  if (free_me != NULL)
    (*__gmp_free_func) (free_me, free_me_size * GMP_LIMB_BYTES);
  TMP_FREE;
}
Beispiel #19
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, q, r, tz, junk;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dup, dnp, qp, rp, junkp;
  mp_limb_t t;
  gmp_pi1_t dinv;
  long count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }

  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (q);
  mpz_init (r);
  mpz_init (tz);
  mpz_init (junk);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  dnp = TMP_ALLOC_LIMBS (maxdn);

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = -300; test < count; test++)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;

      if (test < 0)
	dbits = (test + 300) % (nbits - 1) + 1;
      else
	dbits = random_word (rands) % (nbits - 1) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	RANDFUNC (d, rands, dbits);
      while (mpz_sgn (d) == 0);
      dn = SIZ (d);
      dup = PTR (d);
      MPN_COPY (dnp, dup, dn);
      dnp[dn - 1] |= GMP_NUMB_HIGHBIT;

      if (test % 2 == 0)
	{
	  RANDFUNC (n, rands, nbits);
	  nn = SIZ (n);
	  ASSERT_ALWAYS (nn >= dn);
	}
      else
	{
	  do
	    {
	      RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1));
	      RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2));
	      mpz_mul (n, q, d);
	      mpz_add (n, n, r);
	      nn = SIZ (n);
	    }
	  while (nn > maxnn || nn < dn);
	}

      ASSERT_ALWAYS (nn <= maxnn);
      ASSERT_ALWAYS (dn <= maxdn);

      mpz_urandomb (junk, rands, nbits);
      junkp = PTR (junk);

      np = PTR (n);

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	{
	  dnp[dn - 1] = GMP_NUMB_MAX;
	  dup[dn - 1] = GMP_NUMB_MAX;
	}

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = clearn; i < nn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	}

      if (dn >= 2)
	invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  /* Test mpn_sbpi1_div_qr */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
	    }

	  /* Test mpn_sbpi1_divappr_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
	    }

	  /* Test mpn_sbpi1_div_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
	    }

	  /* Test mpn_sb_div_qr_sec */
	  itch = 3 * nn + 4;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  if (nn >= dn)
	    MPN_COPY (qp, junkp, nn - dn + 1);
	  mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0);

	  /* Test mpn_sb_div_r_sec */
	  itch = nn + 2 * dn + 2;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  mpn_sb_div_r_sec (rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  /* Note: Since check_one cannot cope with random-only functions, we
	     pass qp[] from the previous function, mpn_sb_div_qr_sec.  */
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0);
	}

      /* Test mpn_dcpi1_div_qr */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
	}

      /* Test mpn_dcpi1_divappr_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
	}

      /* Test mpn_dcpi1_div_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
	}

     /* Test mpn_mu_div_qr */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_qr_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
	}

      /* Test mpn_mu_divappr_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_divappr_q_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
	}

      /* Test mpn_mu_div_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_q_itch (nn, dn, 0);
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
	}

      if (1)
	{
	  itch = nn + 1;
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  mpn_div_q (qp, np, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
	}

      if (dn >= 2 && nn >= 2)
	{
	  mp_limb_t qh;

	  /* mpn_divrem_2 */
	  MPN_COPY (rp, np, nn);
	  qp[nn - 2] = qp[nn-1] = qran1;

	  qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);

	  /* Missing: divrem_2 with fraction limbs. */

	  /* mpn_div_qr_2 */
	  qp[nn - 2] = qran1;

	  qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
	}
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (q);
  mpz_clear (r);
  mpz_clear (tz);
  mpz_clear (junk);

  tests_end ();
  return 0;
}
Beispiel #20
0
/* Multiply {up, un} by {vp, vn} and write the result to
   {prodp, un + vn} assuming vn > 2*ceil(un/5).

   Note that prodp gets un + vn limbs stored, even if the actual 
   result only needs un + vn - 1.
*/
void
mpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,
		          mp_srcptr vp, mp_size_t vn)
{
  mp_size_t ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4;

  sn = (un + 4) / 5;

  ASSERT (vn > 2*sn);
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define a4 (up + 4*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)

   TC4_NORM(a0, a0n, sn);
   TC4_NORM(a1, a1n, sn);
   TC4_NORM(a2, a2n, sn);
   TC4_NORM(a3, a3n, sn);
   TC4_NORM(a4, a4n, un - 4*sn); 
   TC4_NORM(b0, b0n, sn);
   TC4_NORM(b1, b1n, sn);
   TC4_NORM(b2, b2n, vn - 2*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))

   tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); 
   tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); 
   tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); 
   tc4_sub(u4, &n4, u5, n5, u2, n2);

   tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n);
   tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); 
   tc4_sub(u5, &n5, u5, n5, b1, b1n);

   MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
   tc4_lshift(r1, &n1, a0, a0n, 4);
   tc4_lshift(u3, &n9, a2, a2n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, u3, n9);
   tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n);
   tc4_lshift(r2, &n8, a1, a1n, 3);
   tc4_addlsh1_unsigned(r2, &n8, a3, a3n);
   tc4_add_unsigned(u5, &n5, r1, n1, r2, n8);
   tc4_sub(u3, &n9, r1, n1, r2, n8);

   tc4_lshift(r1, &n1, b0, b0n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n);
   tc4_lshift(u4, &n10, b1, b1n, 1);
   tc4_add_unsigned(u2, &n2, r1, n1, u4, n10);
   tc4_sub(r2, &n8, r1, n1, u4, n10);
   
   r30 = r3[0];
   if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */
   MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */
   r3[1] = r31;

   tc4_lshift(u2, &n2, a4, a4n, 4);
   tc4_addmul_1(u2, &n2, a3, a3n, 8);
   tc4_addmul_1(u2, &n2, a2, a2n, 4);
   tc4_addlsh1_unsigned(u2, &n2, a1, a1n);
   tc4_add(u2, &n2, u2, n2, a0, a0n);

   tc4_lshift(r1, &n1, b2, b2n, 2);
   tc4_addlsh1_unsigned(r1, &n1, b1, b1n);
   tc4_add(r1, &n1, r1, n1, b0, b0n);
   
   MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */

   MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */

   TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1); 
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++) 
        (r3)[ind] = CNST_LIMB(0); 
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != un + vn) 
   {
	  MPN_ZERO((rp + rpn), un + vn - rpn);
   }

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}