Ejemplo n.º 1
0
void
mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n)
{
  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));

#if 0
  /* FIXME: Can this be removed? */
  if (n == 0)
    return;
#endif

  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (p, a, n, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
    {
      mpn_sqr_basecase (p, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
      mpn_kara_sqr_n (p, a, n, ws);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
#else
  else if (BELOW_THRESHOLD (n, MPN_TOOM3_MAX_N))
#endif
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
      mpn_toom3_sqr_n (p, a, n, ws);
      TMP_SFREE;
    }
  else
#if WANT_FFT || TUNE_PROGRAM_BUILD
    {
      /* The current FFT code allocates its own space.  That should probably
	 change.  */
      mpn_mul_fft_full (p, a, n, a, n);
    }
#else
    {
      /* Toom3 for large operands.  Use workspace from the heap, as stack space
      may be limited.  Since n is at least MUL_TOOM3_THRESHOLD, multiplication
      will take much longer than malloc()/free().  */
      mp_ptr ws;  mp_size_t ws_size;
      ws_size = MPN_TOOM3_SQR_N_TSIZE (n);
      ws = __GMP_ALLOCATE_FUNC_LIMBS (ws_size);
      mpn_toom3_sqr_n (p, a, n, ws);
      __GMP_FREE_FUNC_LIMBS (ws, ws_size);
    }
#endif
}
Ejemplo n.º 2
0
void
mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
{
  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));

#if 0
  /* FIXME: Can this be removed? */
  if (n == 0)
    return;
#endif

  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (p, a, n, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
    {
      mpn_sqr_basecase (p, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
      mpn_kara_sqr_n (p, a, n, ws);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
      mpn_toom3_sqr_n (p, a, n, ws);
      TMP_SFREE;
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
    {
       mpn_toom4_sqr_n (p, a, n);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (n, SQR_FFT_FULL_THRESHOLD))
#else
  else 
#endif
    {
       mpn_toom8_sqr_n (p, a, n);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else
    {
Ejemplo n.º 3
0
void
mpn_sec_sqr (mp_ptr rp,
	     mp_srcptr ap, mp_size_t an,
	     mp_ptr tp)
{
#ifndef SQR_BASECASE_LIM
/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
   size.  */
  mpn_sqr_basecase (rp, ap, an);
#else
/* Else use mpn_sqr_basecase for its allowed sizes, else mpn_mul_basecase.  */
  mpn_mul_basecase (rp, ap, an, ap, an);
#endif
}
Ejemplo n.º 4
0
// (rp,2n)=(xp,n)^2 with temp space (tp,2*n+C)
void	mpn_kara_sqr_n(mp_ptr rp,mp_srcptr xp,mp_size_t n,mp_ptr tp)
{mp_size_t n2,n3;mp_srcptr xl,xh;mp_ptr dx;mp_limb_t c;

n2=n>>1;
xl=xp;xh=xp+n2;n3=n-n2;
dx=rp+2*n2;
if((n&1)==0)
  {if(mpn_cmp(xh,xl,n2)>=0){mpn_sub_n(dx,xh,xl,n2);}else{mpn_sub_n(dx,xl,xh,n2);}}
else
  {if(xh[n2]!=0 || mpn_cmp(xh,xl,n2)>=0){c=mpn_sub_n(dx,xh,xl,n2);dx[n2]=xh[n2]-c;}else{mpn_sub_n(dx,xl,xh,n2);dx[n2]=0;}}
if(BELOW_THRESHOLD(n3,MUL_KARATSUBA_THRESHOLD))
  {mpn_mul_basecase(rp,xl,n2,xl,n2);
   mpn_mul_basecase(tp,dx,n3,dx,n3);
   mpn_mul_basecase(rp+2*n2,xh,n3,xh,n3);}
else if(BELOW_THRESHOLD(n3,SQR_KARATSUBA_THRESHOLD))
  {mpn_sqr_basecase(rp,xl,n2);
   mpn_sqr_basecase(tp,dx,n3);
   mpn_sqr_basecase(rp+2*n2,xh,n3);}
else
  {mpn_kara_sqr_n(rp,xl,n2,tp+2*n3);
   mpn_kara_sqr_n(tp,dx,n3,tp+2*n3);   
   mpn_kara_sqr_n(rp+2*n2,xh,n3,tp+2*n3);}
mpn_karasub(rp,tp,n);
return;}
Ejemplo n.º 5
0
Archivo: mul.c Proyecto: mahdiz/mpclib
void
mpn_sqr_n (mp_ptr prodp,
	   mp_srcptr up, mp_size_t un)
{
  ASSERT (un >= 1);
  ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));

  /* FIXME: Can this be removed? */
  if (un == 0)
    return;

  if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (prodp, up, un, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
    { /* plain schoolbook multiplication */
      mpn_sqr_basecase (prodp, up, un);
    }
  else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
    { /* karatsuba multiplication */
      mp_ptr tspace;
      TMP_DECL (marker);
      TMP_MARK (marker);
      tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
      mpn_kara_sqr_n (prodp, up, un, tspace);
      TMP_FREE (marker);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
#else
  else
#endif
    { /* Toom3 multiplication.
	 Use workspace from the heap, as stack may be limited.  Since n is
	 at least MUL_TOOM3_THRESHOLD, the multiplication will take much
	 longer than malloc()/free().  */
      mp_ptr     tspace;
      mp_size_t  tsize;
      tsize = MPN_TOOM3_SQR_N_TSIZE (un);
      tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
      mpn_toom3_sqr_n (prodp, up, un, tspace);
      __GMP_FREE_FUNC_LIMBS (tspace, tsize);
    }
#if WANT_FFT || TUNE_PROGRAM_BUILD
  else
    {
Ejemplo n.º 6
0
/* Define our own squaring function, which uses mpn_sqr_basecase for its
   allowed sizes, but its own code for larger sizes.  */
static void
mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
{
  mp_size_t i;

  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));

  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
    {
      mpn_sqr_basecase (rp, up, n);
      return;
    }

  {
    mp_limb_t ul, lpl;
    ul = up[0];
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    rp[0] = lpl >> GMP_NAIL_BITS;
  }
  if (n > 1)
    {
      mp_limb_t cy;

      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
      tp[n - 1] = cy;
      for (i = 2; i < n; i++)
	{
	  mp_limb_t cy;
	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
	  tp[n + i - 2] = cy;
	}
      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);

      {
	mp_limb_t cy;
#if HAVE_NATIVE_mpn_addlsh1_n
	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
#else
	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
#endif
	rp[2 * n - 1] += cy;
      }
    }
}
Ejemplo n.º 7
0
void
mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
{
  mp_limb_t w, w0, w1;
  mp_size_t n2;
  mp_srcptr x, y;
  mp_size_t i;

  n2 = n >> 1;
  ASSERT (n2 > 0);

  if ((n & 1) != 0)
    {
      /* Odd length. */
      mp_size_t n1, n3, nm1;

      n3 = n - n2;

      w = a[n2];
      if (w != 0)
	w -= mpn_sub_n (p, a, a + n3, n2);
      else
	{
	  i = n2;
	  do
	    {
	      --i;
	      w0 = a[i];
	      w1 = a[n3 + i];
	    }
	  while (w0 == w1 && i != 0);
	  if (w0 < w1)
	    {
	      x = a + n3;
	      y = a;
	    }
	  else
	    {
	      x = a;
	      y = a + n3;
	    }
	  mpn_sub_n (p, x, y, n2);
	}
      p[n2] = w;

      n1 = n + 1;

      /* n2 is always either n3 or n3-1 so maybe the two sets of tests here
	 could be combined.  But that's not important, since the tests will
	 take a miniscule amount of time compared to the function calls.  */
      if (BELOW_THRESHOLD (n3, SQR_BASECASE_THRESHOLD))
	{
	  mpn_mul_basecase (ws, p, n3, p, n3);
	  mpn_mul_basecase (p,  a, n3, a, n3);
	}
      else if (BELOW_THRESHOLD (n3, SQR_KARATSUBA_THRESHOLD))
	{
	  mpn_sqr_basecase (ws, p, n3);
	  mpn_sqr_basecase (p,  a, n3);
	}
      else
	{
	  mpn_kara_sqr_n   (ws, p, n3, ws + n1);	 /* (x-y)^2 */
	  mpn_kara_sqr_n   (p,  a, n3, ws + n1);	 /* x^2	    */
	}
      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
	mpn_mul_basecase (p + n1, a + n3, n2, a + n3, n2);
      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
	mpn_sqr_basecase (p + n1, a + n3, n2);
      else
	mpn_kara_sqr_n   (p + n1, a + n3, n2, ws + n1);	 /* y^2	    */

      /* Since x^2+y^2-(x-y)^2 = 2xy >= 0 there's no need to track the
	 borrow from mpn_sub_n.	 If it occurs then it'll be cancelled by a
	 carry from ws[n].  Further, since 2xy fits in n1 limbs there won't
	 be any carry out of ws[n] other than cancelling that borrow. */

      mpn_sub_n (ws, p, ws, n1);	     /* x^2-(x-y)^2 */

      nm1 = n - 1;
      if (mpn_add_n (ws, p + n1, ws, nm1))   /* x^2+y^2-(x-y)^2 = 2xy */
	{
	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;
	  ws[nm1] = x;
	  if (x == 0)
	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;
	}
      if (mpn_add_n (p + n3, p + n3, ws, n1))
	{
	  mpn_incr_u (p + n1 + n3, 1);
	}
    }
  else
    {
      /* Even length. */
      i = n2;
      do
	{
	  --i;
	  w0 = a[i];
	  w1 = a[n2 + i];
	}
      while (w0 == w1 && i != 0);
      if (w0 < w1)
	{
	  x = a + n2;
	  y = a;
	}
      else
	{
	  x = a;
	  y = a + n2;
	}
      mpn_sub_n (p, x, y, n2);

      /* Pointwise products. */
      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
	{
	  mpn_mul_basecase (ws,    p,      n2, p,      n2);
	  mpn_mul_basecase (p,     a,      n2, a,      n2);
	  mpn_mul_basecase (p + n, a + n2, n2, a + n2, n2);
	}
      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
	{
	  mpn_sqr_basecase (ws,    p,      n2);
	  mpn_sqr_basecase (p,     a,      n2);
	  mpn_sqr_basecase (p + n, a + n2, n2);
	}
      else
	{
	  mpn_kara_sqr_n (ws,    p,      n2, ws + n);
	  mpn_kara_sqr_n (p,     a,      n2, ws + n);
	  mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
	}

      /* Interpolate. */
      w = -mpn_sub_n (ws, p, ws, n);
      w += mpn_add_n (ws, p + n, ws, n);
      w += mpn_add_n (p + n2, p + n2, ws, n);
      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
    }
}
Ejemplo n.º 8
0
void
check_functions (void)
{
  mp_limb_t  wp[2], wp2[2], xp[2], yp[2], r;
  int  i;

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      yp[0] = 456;
      mpn_add_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 579);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 702);
      ASSERT_ALWAYS (r == 0);
    }

#if HAVE_NATIVE_mpn_copyd
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyd (xp+1, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[1] == 123);
    }
#endif

#if HAVE_NATIVE_mpn_copyi
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      xp[1] = 456;
      mpn_copyi (xp, xp+1, (mp_size_t) 1);
      ASSERT_ALWAYS (xp[0] == 456);
    }
#endif

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 1605;
      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));
      ASSERT_ALWAYS (wp[0] == 321);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 1296;
      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));
      ASSERT_ALWAYS (wp[0] == 432);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 578;
      r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17));
      ASSERT_ALWAYS (wp[0] == 34);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 287;
      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[1] == 41);
      ASSERT_ALWAYS (wp[0] == 0);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 290;
      r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7));
      ASSERT_ALWAYS (wp[0] == 41);
      ASSERT_ALWAYS (r == 3);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 12;
      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));
      ASSERT_ALWAYS (r == 3);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x1001;
      mpn_lshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x2002);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 14;
      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (r == 2);
    }

#if (GMP_NUMB_BITS % 4) == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      int  bits = (GMP_NUMB_BITS / 4) * 3;
      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;
      mp_limb_t  want = GMP_NUMB_MAX % mod;
      xp[0] = GMP_NUMB_MAX;
      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);
      ASSERT_ALWAYS (r % mod == want);
    }
#endif

  //   DECL_modexact_1c_odd ((*modexact_1c_odd)); 

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 14;
      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));
      ASSERT_ALWAYS (wp[0] == 56);
      ASSERT_ALWAYS (r == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 5;
      yp[0] = 7;
      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 35);
      ASSERT_ALWAYS (wp[1] == 0);
    }

#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x101;
      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,
                               GMP_LIMB_HIGHBIT,
                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);
      ASSERT_ALWAYS (wp[0] == 0x202);
      ASSERT_ALWAYS (wp[1] == 0);
      ASSERT_ALWAYS (r == 0);
    }
#endif

#if GMP_NAIL_BITS == 0
  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = GMP_LIMB_HIGHBIT+123;
      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,
                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));
      ASSERT_ALWAYS (r == 123);
    }
#endif


 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
       {
        xp[0] = 5;
        modlimb_invert(r,xp[0]);
        r=-r;
        yp[0]=43;
        yp[1]=75;
        mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r);
        ASSERT_ALWAYS (wp[0] == 78);
       }

 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
   for (i = 0; i < 2; i++)
       {
        xp[0]=5;
        yp[0]=3;
        mpn_sumdiff_n (wp, wp2,xp, yp,1);
        ASSERT_ALWAYS (wp[0] == 8);
        ASSERT_ALWAYS (wp2[0] == 2);
       }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 0x8008;
      mpn_rshift (wp, xp, (mp_size_t) 1, 1);
      ASSERT_ALWAYS (wp[0] == 0x4004);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 5;
      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 25);
      ASSERT_ALWAYS (wp[1] == 0);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 999;
      yp[0] = 666;
      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);
      ASSERT_ALWAYS (wp[0] == 333);
    }

  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
  for (i = 0; i < 2; i++)
    {
      xp[0] = 123;
      wp[0] = 456;
      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
      ASSERT_ALWAYS (wp[0] == 210);
      ASSERT_ALWAYS (r == 0);
    }
}
Ejemplo n.º 9
0
void
check (void)
{
  mp_limb_t  wp[100], xp[100], yp[100];
  mp_size_t  size = 100;

  refmpn_zero (xp, size);
  refmpn_zero (yp, size);
  refmpn_zero (wp, size);

  pre ("mpn_add_n");
  mpn_add_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_add_nc
  pre ("mpn_add_nc");
  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_addlsh1_n
  pre ("mpn_addlsh1_n");
  mpn_addlsh1_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_and_n
  pre ("mpn_and_n");
  mpn_and_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_andn_n
  pre ("mpn_andn_n");
  mpn_andn_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_addmul_1");
  mpn_addmul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_addmul_1c
  pre ("mpn_addmul_1c");
  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_com_n
  pre ("mpn_com_n");
  mpn_com_n (wp, xp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_copyd
  pre ("mpn_copyd");
  mpn_copyd (wp, xp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_copyi
  pre ("mpn_copyi");
  mpn_copyi (wp, xp, size);
  post ();
#endif

  pre ("mpn_divexact_1");
  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_divexact_by3c");
  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));
  post ();

  pre ("mpn_divrem_1");
  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_divrem_1c
  pre ("mpn_divrem_1c");
  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();
#endif

  pre ("mpn_gcd_1");
  xp[0] |= 1;
  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_gcd_finda
  pre ("mpn_gcd_finda");
  xp[0] |= 1;
  xp[1] |= 1;
  notdead += mpn_gcd_finda (xp);
  post ();
#endif

  pre ("mpn_hamdist");
  notdead += mpn_hamdist (xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_ior_n
  pre ("mpn_ior_n");
  mpn_ior_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_iorn_n
  pre ("mpn_iorn_n");
  mpn_iorn_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_lshift");
  mpn_lshift (wp, xp, size, 1);
  post ();

  pre ("mpn_mod_1");
  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));
  post ();

#if HAVE_NATIVE_mpn_mod_1c
  pre ("mpn_mod_1c");
  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));
  post ();
#endif

#if GMP_NUMB_BITS % 4 == 0
  pre ("mpn_mod_34lsub1");
  notdead += mpn_mod_34lsub1 (xp, size);
  post ();
#endif

  pre ("mpn_modexact_1_odd");
  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));
  post ();

  pre ("mpn_modexact_1c_odd");
  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));
  post ();

  pre ("mpn_mul_1");
  mpn_mul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_mul_1c
  pre ("mpn_mul_1c");
  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_mul_2
  pre ("mpn_mul_2");
  mpn_mul_2 (wp, xp, size-1, yp);
  post ();
#endif

  pre ("mpn_mul_basecase");
  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);
  post ();

#if HAVE_NATIVE_mpn_nand_n
  pre ("mpn_nand_n");
  mpn_nand_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_nior_n
  pre ("mpn_nior_n");
  mpn_nior_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_popcount");
  notdead += mpn_popcount (xp, size);
  post ();

  pre ("mpn_preinv_mod_1");
  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,
                               refmpn_invert_limb (GMP_NUMB_MAX));
  post ();

#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
  pre ("mpn_preinv_divrem_1");
  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,
                       refmpn_invert_limb (GMP_NUMB_MAX), 0);
  post ();
#endif

#if HAVE_NATIVE_mpn_rsh1add_n
  pre ("mpn_rsh1add_n");
  mpn_rsh1add_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_rsh1sub_n
  pre ("mpn_rsh1sub_n");
  mpn_rsh1sub_n (wp, xp, yp, size);
  post ();
#endif

  pre ("mpn_rshift");
  mpn_rshift (wp, xp, size, 1);
  post ();

  pre ("mpn_sqr_basecase");
  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);
  post ();

  pre ("mpn_submul_1");
  mpn_submul_1 (wp, xp, size, yp[0]);
  post ();

#if HAVE_NATIVE_mpn_submul_1c
  pre ("mpn_submul_1c");
  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
  post ();
#endif

  pre ("mpn_sub_n");
  mpn_sub_n (wp, xp, yp, size);
  post ();

#if HAVE_NATIVE_mpn_sub_nc
  pre ("mpn_sub_nc");
  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));
  post ();
#endif

#if HAVE_NATIVE_mpn_sublsh1_n
  pre ("mpn_sublsh1_n");
  mpn_sublsh1_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_udiv_qrnnd
  pre ("mpn_udiv_qrnnd");
  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));
  post ();
#endif

#if HAVE_NATIVE_mpn_udiv_qrnnd_r
  pre ("mpn_udiv_qrnnd_r");
  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_umul_ppmm
  pre ("mpn_umul_ppmm");
  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_umul_ppmm_r
  pre ("mpn_umul_ppmm_r");
  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);
  post ();
#endif

#if HAVE_NATIVE_mpn_xor_n
  pre ("mpn_xor_n");
  mpn_xor_n (wp, xp, yp, size);
  post ();
#endif

#if HAVE_NATIVE_mpn_xnor_n
  pre ("mpn_xnor_n");
  mpn_xnor_n (wp, xp, yp, size);
  post ();
#endif
}
Ejemplo n.º 10
0
void
mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
{
  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));

  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
      mpn_mul_basecase (p, a, n, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
    {
      mpn_sqr_basecase (p, a, n);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
      mpn_toom2_sqr (p, a, n, ws);
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
      mpn_toom3_sqr (p, a, n, ws);
      TMP_SFREE;
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
      mpn_toom4_sqr (p, a, n, ws);
      TMP_SFREE;
    }
  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
    {
      mp_ptr ws;
      TMP_SDECL;
      TMP_SMARK;
      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
      mpn_toom6_sqr (p, a, n, ws);
      TMP_SFREE;
    }
  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
    {
      mp_ptr ws;
      TMP_DECL;
      TMP_MARK;
      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
      mpn_toom8_sqr (p, a, n, ws);
      TMP_FREE;
    }
  else
    {
      /* The current FFT code allocates its own space.  That should probably
	 change.  */
      mpn_fft_mul (p, a, n, a, n);
    }
}