Пример #1
0
mp_limb_t	mpn_sumdiff_n(mp_ptr s,mp_ptr d,mp_srcptr x,mp_srcptr y,mp_size_t n)
{mp_limb_t ret;mp_ptr t;

ASSERT(n>0);
ASSERT_MPN(x,n);ASSERT_MPN(y,n);//ASSERT_SPACE(s,n);ASSERT_SPACE(d,n);
ASSERT(MPN_SAME_OR_SEPARATE_P(s,x,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(s,y,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(d,x,n));
ASSERT(MPN_SAME_OR_SEPARATE_P(d,y,n));
ASSERT(!MPN_OVERLAP_P(s,n,d,n));

if( (s==x && d==y)||(s==y && d==x) )
  {t=__GMP_ALLOCATE_FUNC_LIMBS(n);
   ret=mpn_sub_n(t,x,y,n);
   ret+=2*mpn_add_n(s,x,y,n);
   MPN_COPY(d,t,n);
   __GMP_FREE_FUNC_LIMBS(t,n);
   return ret;}
if(s==x || s==y)
  {ret=mpn_sub_n(d,x,y,n);
   ret+=2*mpn_add_n(s,x,y,n);
   return ret;}
ret=2*mpn_add_n(s,x,y,n);
ret+=mpn_sub_n(d,x,y,n);
return ret;}
Пример #2
0
void
mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n)
{
  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));

#if 0
  /* FIXME: Can this be removed? */
  if (n == 0)
    return;
#endif

  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (p, a, n, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
    {
      mpn_sqr_basecase (p, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
      mpn_kara_sqr_n (p, a, n, ws);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
#else
  else if (BELOW_THRESHOLD (n, MPN_TOOM3_MAX_N))
#endif
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
      mpn_toom3_sqr_n (p, a, n, ws);
      TMP_SFREE;
    }
  else
#if WANT_FFT || TUNE_PROGRAM_BUILD
    {
      /* The current FFT code allocates its own space.  That should probably
	 change.  */
      mpn_mul_fft_full (p, a, n, a, n);
    }
#else
    {
      /* Toom3 for large operands.  Use workspace from the heap, as stack space
      may be limited.  Since n is at least MUL_TOOM3_THRESHOLD, multiplication
      will take much longer than malloc()/free().  */
      mp_ptr ws;  mp_size_t ws_size;
      ws_size = MPN_TOOM3_SQR_N_TSIZE (n);
      ws = __GMP_ALLOCATE_FUNC_LIMBS (ws_size);
      mpn_toom3_sqr_n (p, a, n, ws);
      __GMP_FREE_FUNC_LIMBS (ws, ws_size);
    }
#endif
}
Пример #3
0
void
mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
{
  mp_ptr rootp, up, remp;
  mp_size_t us, un, rootn, remn;

  up = PTR(u);
  us = SIZ(u);

  /* even roots of negatives provoke an exception */
  if (us < 0 && (nth & 1) == 0)
    SQRT_OF_NEGATIVE;

  /* root extraction interpreted as c^(1/nth) means a zeroth root should
     provoke a divide by zero, do this even if c==0 */
  if (nth == 0)
    DIVIDE_BY_ZERO;

  if (us == 0)
    {
      if (root != NULL)
	SIZ(root) = 0;
      SIZ(rem) = 0;
      return;
    }

  un = ABS (us);
  rootn = (un - 1) / nth + 1;

  if (root != NULL)
    {
      rootp = MPZ_REALLOC (root, rootn);
      up = PTR(u);
    }
  else
    {
      rootp = __GMP_ALLOCATE_FUNC_LIMBS (rootn);
    }

  MPZ_REALLOC (rem, un);
  remp = PTR(rem);

  if (nth == 1)
    {
      MPN_COPY (rootp, up, un);
      remn = 0;
    }
  else
    {
      remn = mpn_rootrem (rootp, remp, up, un, nth);
    }

  if (root != NULL)
    SIZ(root) = us >= 0 ? rootn : -rootn;
  else
    __GMP_FREE_FUNC_LIMBS (rootp, rootn);

  SIZ(rem) = remn;
}
Пример #4
0
Файл: t-mul.c Проект: HRF92/mpir
static void
ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
{
  mp_ptr tp;
  mp_size_t tn;
  mp_limb_t cy;

  if (vn < TOOM3_THRESHOLD)
    {
      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
	 mul_basecase.  */
      if (vn != 0)
	mul_basecase (wp, up, un, vp, vn);
      else
	MPN_ZERO (wp, un);
      return;
    }

  if (vn < FFT_THRESHOLD)
    {
      /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n.  */
      tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn);
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }
  else
    {
      /* Finally, for the largest operands, use mpn_toom3_mul_n.  */
      /* The "- 63 + 255" tweaks the allocation to allow for huge operands.
	 See the definition of this macro in gmp-impl.h to understand this.  */
      tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255;
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }

  if (un != vn)
    {
      if (un - vn < vn)
	ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn);
      else
	ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn);

      MPN_COPY (wp, tp, vn);
      cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn);
      mpn_incr_u (wp + 2 * vn, cy);
    }
  else
    {
      MPN_COPY (wp, tp, 2 * vn);
    }

  __GMP_FREE_FUNC_LIMBS (tp, tn);
}
Пример #5
0
void
mpn_sqr_n (mp_ptr prodp,
	   mp_srcptr up, mp_size_t un)
{
  ASSERT (un >= 1);
  ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));

  /* FIXME: Can this be removed? */
  if (un == 0)
    return;

  if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (prodp, up, un, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
    { /* plain schoolbook multiplication */
      mpn_sqr_basecase (prodp, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
    { /* karatsuba multiplication */
      mp_ptr tspace;
      TMP_DECL (marker);
      TMP_MARK (marker);
      tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
      mpn_kara_sqr_n (prodp, up, un, tspace);
      TMP_FREE (marker);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
#else
  else
#endif
    { /* Toom3 multiplication.
	 Use workspace from the heap, as stack may be limited.  Since n is
	 at least MUL_TOOM3_THRESHOLD, the multiplication will take much
	 longer than malloc()/free().  */
      mp_ptr     tspace;
      mp_size_t  tsize;
      tsize = MPN_TOOM3_SQR_N_TSIZE (un);
      tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
      mpn_toom3_sqr_n (prodp, up, un, tspace);
      __GMP_FREE_FUNC_LIMBS (tspace, tsize);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else
    {
Пример #6
0
static void
refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
{
  mp_size_t usize = u->_mp_size;
  mp_size_t vsize = v->_mp_size;
  mp_size_t wsize;
  mp_size_t sign_product;
  mp_ptr up, vp;
  mp_ptr wp;
  mp_size_t talloc;

  sign_product = usize ^ vsize;
  usize = ABS (usize);
  vsize = ABS (vsize);

  if (usize == 0 || vsize == 0)
    {
      SIZ (w) = 0;
      return;
    }

  talloc = usize + vsize;

  up = u->_mp_d;
  vp = v->_mp_d;

  wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);

  if (usize > vsize)
    refmpn_mul (wp, up, usize, vp, vsize);
  else
    refmpn_mul (wp, vp, vsize, up, usize);
  wsize = usize + vsize;
  wsize -= wp[wsize - 1] == 0;
  MPZ_REALLOC (w, wsize);
  MPN_COPY (PTR(w), wp, wsize);

  SIZ(w) = sign_product < 0 ? -wsize : wsize;
  __GMP_FREE_FUNC_LIMBS (wp, talloc);
}
Пример #7
0
void
mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n)
{
  mp_size_t len1, ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4;

  len1 = n;
  ASSERT (n >= 1);

  MPN_NORMALIZE(up, len1);
  
  sn = (n - 1) / 4 + 1;

  /* a0 - a3 are defined in mpn_toom4_mul_n above */
  
   TC4_NORM(a0, a0n, sn);
	TC4_NORM(a1, a1n, sn);
	TC4_NORM(a2, a2n, sn);
	TC4_NORM(a3, a3n, n - 3*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

   tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); 
	tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); 
   tc4_sub(u3, &n3, u4, n4, u5, n5);

	SQR_TC4(r4, n4, u3, n3);
   SQR_TC4_UNSIGNED(r3, n3, u2, n2);
	
	tc4_lshift(r1, &n1, a0, a0n, 3);
	tc4_addlsh1_unsigned(r1, &n1, a2, a2n);
 	tc4_lshift(r2, &n8, a1, a1n, 2);
   tc4_add(r2, &n8, r2, n8, a3, a3n);
   tc4_add(u4, &n9, r1, n1, r2, n8);
   tc4_sub(u5, &n5, r1, n1, r2, n8);
   
	r30 = r3[0];
	if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
	SQR_TC4(r6, n6, u5, n5);
   SQR_TC4_UNSIGNED(r5, n5, u4, n9);
   r3[1] = r31;

   tc4_lshift(u2, &n8, a3, a3n, 3);
   tc4_addmul_1(u2, &n8, a2, a2n, 4);
	tc4_addlsh1_unsigned(u2, &n8, a1, a1n);
	tc4_add(u2, &n8, u2, n8, a0, a0n);
   
	SQR_TC4_UNSIGNED(r2, n2, u2, n8);
   SQR_TC4_UNSIGNED(r1, n1, a3, a3n);
   SQR_TC4_UNSIGNED(r7, n7, a0, a0n);

	TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1);
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++)
        (r3)[ind] = CNST_LIMB(0);
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

	mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

	if (rpn != 2*n) 
	{
		MPN_ZERO((rp + rpn), 2*n - rpn);
	}

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}
Пример #8
0
void
mpz_clear (mpz_ptr x)
{
  if (ALLOC (x))
    __GMP_FREE_FUNC_LIMBS (PTR (x), ALLOC(x));
}
Пример #9
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, tz;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dp, qp, rp;
  mp_limb_t rh;
  mp_limb_t t;
  mp_limb_t dinv;
  int count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }


  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (tz);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = 0; test < count;)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
      if (maxdbits > nbits)
	dbits = random_word (rands) % nbits + 1;
      else
	dbits = random_word (rands) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	{
	  RANDFUNC (n, rands, nbits);
	  do
	    {
	      RANDFUNC (d, rands, dbits);
	    }
	  while (mpz_sgn (d) == 0);

	  np = PTR (n);
	  dp = PTR (d);
	  nn = SIZ (n);
	  dn = SIZ (d);
	}
      while (nn < dn);

      dp[0] |= 1;

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	dp[0] = GMP_NUMB_MAX;

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = 0; i <= clearn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
	  break;
	}

      test++;

      binvert_limb (dinv, dp[0]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_qr */
	      MPN_ZERO (qp, nn - dn);
	      MPN_ZERO (rp, dn);
	      MPN_COPY (rp, np, nn);
	      rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
	    }

	  if (nn > dn)
	    {
	      /* Test mpn_sbpi1_bdiv_q */
	      MPN_COPY (rp, np, nn);
	      MPN_ZERO (qp, nn - dn);
	      mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	      ASSERT_ALWAYS (rp[-1] == rran0);
	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
	    }
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_qr */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
	}

      if (dn >= 4 && nn - dn >= 2)
	{
	  /* Test mpn_dcpi1_bdiv_q */
	  MPN_COPY (rp, np, nn);
	  MPN_ZERO (qp, nn - dn);
	  mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
	}

      if (nn > dn)
	{
	  /* Test mpn_bdiv_qr */
	  itch = mpn_bdiv_qr_itch (nn, dn);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_ZERO (qp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);

	  check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
	}

      if (nn - dn < 2 || dn < 2)
	continue;

      /* Test mpn_mu_bdiv_qr */
      itch = mpn_mu_bdiv_qr_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn);
      MPN_ZERO (rp, dn);
      rp[dn] = rran1;
      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");

      /* Test mpn_mu_bdiv_q */
      itch = mpn_mu_bdiv_q_itch (nn, dn);
      if (itch + 1 > alloc)
	{
	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	  alloc = itch + 1;
	}
      scratch[itch] = ran;
      MPN_ZERO (qp, nn - dn + 1);
      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
      ASSERT_ALWAYS (ran == scratch[itch]);
      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (tz);

  tests_end ();
  return 0;
}
Пример #10
0
int
main (int argc, char **argv)
{
  gmp_randstate_ptr rands;
  unsigned long maxnbits, maxdbits, nbits, dbits;
  mpz_t n, d, q, r, tz, junk;
  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
  mp_ptr np, dup, dnp, qp, rp, junkp;
  mp_limb_t t;
  gmp_pi1_t dinv;
  long count = COUNT;
  mp_ptr scratch;
  mp_limb_t ran;
  mp_size_t alloc, itch;
  mp_limb_t rran0, rran1, qran0, qran1;
  TMP_DECL;

  if (argc > 1)
    {
      char *end;
      count = strtol (argv[1], &end, 0);
      if (*end || count <= 0)
	{
	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
	  return 1;
	}
    }

  maxdbits = MAX_DN;
  maxnbits = MAX_NN;

  tests_start ();
  rands = RANDS;

  mpz_init (n);
  mpz_init (d);
  mpz_init (q);
  mpz_init (r);
  mpz_init (tz);
  mpz_init (junk);

  maxnn = maxnbits / GMP_NUMB_BITS + 1;
  maxdn = maxdbits / GMP_NUMB_BITS + 1;

  TMP_MARK;

  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
  dnp = TMP_ALLOC_LIMBS (maxdn);

  alloc = 1;
  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);

  for (test = -300; test < count; test++)
    {
      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;

      if (test < 0)
	dbits = (test + 300) % (nbits - 1) + 1;
      else
	dbits = random_word (rands) % (nbits - 1) % maxdbits + 1;

#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
#else
#define RANDFUNC mpz_rrandomb
#endif

      do
	RANDFUNC (d, rands, dbits);
      while (mpz_sgn (d) == 0);
      dn = SIZ (d);
      dup = PTR (d);
      MPN_COPY (dnp, dup, dn);
      dnp[dn - 1] |= GMP_NUMB_HIGHBIT;

      if (test % 2 == 0)
	{
	  RANDFUNC (n, rands, nbits);
	  nn = SIZ (n);
	  ASSERT_ALWAYS (nn >= dn);
	}
      else
	{
	  do
	    {
	      RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1));
	      RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2));
	      mpz_mul (n, q, d);
	      mpz_add (n, n, r);
	      nn = SIZ (n);
	    }
	  while (nn > maxnn || nn < dn);
	}

      ASSERT_ALWAYS (nn <= maxnn);
      ASSERT_ALWAYS (dn <= maxdn);

      mpz_urandomb (junk, rands, nbits);
      junkp = PTR (junk);

      np = PTR (n);

      mpz_urandomb (tz, rands, 32);
      t = mpz_get_ui (tz);

      if (t % 17 == 0)
	{
	  dnp[dn - 1] = GMP_NUMB_MAX;
	  dup[dn - 1] = GMP_NUMB_MAX;
	}

      switch ((int) t % 16)
	{
	case 0:
	  clearn = random_word (rands) % nn;
	  for (i = clearn; i < nn; i++)
	    np[i] = 0;
	  break;
	case 1:
	  mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	case 2:
	  mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands));
	  break;
	}

      if (dn >= 2)
	invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);

      rran0 = random_word (rands);
      rran1 = random_word (rands);
      qran0 = random_word (rands);
      qran1 = random_word (rands);

      qp[-1] = qran0;
      qp[nn - dn + 1] = qran1;
      rp[-1] = rran0;

      ran = random_word (rands);

      if ((double) (nn - dn) * dn < 1e5)
	{
	  /* Test mpn_sbpi1_div_qr */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
	    }

	  /* Test mpn_sbpi1_divappr_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
	    }

	  /* Test mpn_sbpi1_div_q */
	  if (dn > 2)
	    {
	      MPN_COPY (rp, np, nn);
	      if (nn > dn)
		MPN_COPY (qp, junkp, nn - dn);
	      qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
	    }

	  /* Test mpn_sb_div_qr_sec */
	  itch = 3 * nn + 4;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  if (nn >= dn)
	    MPN_COPY (qp, junkp, nn - dn + 1);
	  mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0);

	  /* Test mpn_sb_div_r_sec */
	  itch = nn + 2 * dn + 2;
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (rp, np, nn);
	  mpn_sb_div_r_sec (rp, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  /* Note: Since check_one cannot cope with random-only functions, we
	     pass qp[] from the previous function, mpn_sb_div_qr_sec.  */
	  check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0);
	}

      /* Test mpn_dcpi1_div_qr */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
	}

      /* Test mpn_dcpi1_divappr_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
	}

      /* Test mpn_dcpi1_div_q */
      if (dn >= 6 && nn - dn >= 3)
	{
	  MPN_COPY (rp, np, nn);
	  if (nn > dn)
	    MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
	}

     /* Test mpn_mu_div_qr */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_qr_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  MPN_ZERO (rp, dn);
	  rp[dn] = rran1;
	  qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
	  check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
	}

      /* Test mpn_mu_divappr_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_divappr_q_itch (nn, dn, 0);
	  if (itch + 1 > alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
	}

      /* Test mpn_mu_div_q */
      if (nn - dn > 2 && dn >= 2)
	{
	  itch = mpn_mu_div_q_itch (nn, dn, 0);
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  MPN_COPY (qp, junkp, nn - dn);
	  qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
	}

      if (1)
	{
	  itch = nn + 1;
	  if (itch + 1> alloc)
	    {
	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
	      alloc = itch + 1;
	    }
	  scratch[itch] = ran;
	  mpn_div_q (qp, np, nn, dup, dn, scratch);
	  ASSERT_ALWAYS (ran == scratch[itch]);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
	  check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
	}

      if (dn >= 2 && nn >= 2)
	{
	  mp_limb_t qh;

	  /* mpn_divrem_2 */
	  MPN_COPY (rp, np, nn);
	  qp[nn - 2] = qp[nn-1] = qran1;

	  qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);

	  /* Missing: divrem_2 with fraction limbs. */

	  /* mpn_div_qr_2 */
	  qp[nn - 2] = qran1;

	  qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
	  qp[nn - 2] = qh;
	  check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
	}
    }

  __GMP_FREE_FUNC_LIMBS (scratch, alloc);

  TMP_FREE;

  mpz_clear (n);
  mpz_clear (d);
  mpz_clear (q);
  mpz_clear (r);
  mpz_clear (tz);
  mpz_clear (junk);

  tests_end ();
  return 0;
}
Пример #11
0
/* Multiply {up, un} by {vp, vn} and write the result to
   {prodp, un + vn} assuming vn > 2*ceil(un/5).

   Note that prodp gets un + vn limbs stored, even if the actual 
   result only needs un + vn - 1.
*/
void
mpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,
		          mp_srcptr vp, mp_size_t vn)
{
  mp_size_t ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4;

  sn = (un + 4) / 5;

  ASSERT (vn > 2*sn);
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define a4 (up + 4*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)

   TC4_NORM(a0, a0n, sn);
   TC4_NORM(a1, a1n, sn);
   TC4_NORM(a2, a2n, sn);
   TC4_NORM(a3, a3n, sn);
   TC4_NORM(a4, a4n, un - 4*sn); 
   TC4_NORM(b0, b0n, sn);
   TC4_NORM(b1, b1n, sn);
   TC4_NORM(b2, b2n, vn - 2*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))

   tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); 
   tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); 
   tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); 
   tc4_sub(u4, &n4, u5, n5, u2, n2);

   tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n);
   tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); 
   tc4_sub(u5, &n5, u5, n5, b1, b1n);

   MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
   tc4_lshift(r1, &n1, a0, a0n, 4);
   tc4_lshift(u3, &n9, a2, a2n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, u3, n9);
   tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n);
   tc4_lshift(r2, &n8, a1, a1n, 3);
   tc4_addlsh1_unsigned(r2, &n8, a3, a3n);
   tc4_add_unsigned(u5, &n5, r1, n1, r2, n8);
   tc4_sub(u3, &n9, r1, n1, r2, n8);

   tc4_lshift(r1, &n1, b0, b0n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n);
   tc4_lshift(u4, &n10, b1, b1n, 1);
   tc4_add_unsigned(u2, &n2, r1, n1, u4, n10);
   tc4_sub(r2, &n8, r1, n1, u4, n10);
   
   r30 = r3[0];
   if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */
   MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */
   r3[1] = r31;

   tc4_lshift(u2, &n2, a4, a4n, 4);
   tc4_addmul_1(u2, &n2, a3, a3n, 8);
   tc4_addmul_1(u2, &n2, a2, a2n, 4);
   tc4_addlsh1_unsigned(u2, &n2, a1, a1n);
   tc4_add(u2, &n2, u2, n2, a0, a0n);

   tc4_lshift(r1, &n1, b2, b2n, 2);
   tc4_addlsh1_unsigned(r1, &n1, b1, b1n);
   tc4_add(r1, &n1, r1, n1, b0, b0n);
   
   MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */

   MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */

   TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1); 
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++) 
        (r3)[ind] = CNST_LIMB(0); 
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != un + vn) 
   {
	  MPN_ZERO((rp + rpn), un + vn - rpn);
   }

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}