Example #1
0
int
main (int argc, char **argv)
{
  mp_ptr r1p, r2p, s1p, s2p;
  double t;
  mp_size_t n;

  n = strtol (argv[1], 0, 0);

  r1p = malloc (n * BYTES_PER_MP_LIMB);
  r2p = malloc (n * BYTES_PER_MP_LIMB);
  s1p = malloc (n * BYTES_PER_MP_LIMB);
  s2p = malloc (n * BYTES_PER_MP_LIMB);
  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
  printf ("              separate add and sub: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
  printf ("combined addsub separate variables: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
  printf ("        combined addsub r1 overlap: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
  printf ("        combined addsub r2 overlap: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
  printf ("          combined addsub in-place: %.3f\n", t);

  return 0;
}
Example #2
0
void mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, 
                    mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)
{
    mp_size_t i;
    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;
    
    if (trunc == 2*n)
        mpir_ifft_radix2(ii, n, w, t1, t2);
    else if (trunc <= n)
    {
        for (i = trunc; i < n; i++)
        {
            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);
            mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, 1);
        }
      
        mpir_ifft_trunc1(ii, n/2, 2*w, t1, t2, trunc);

        for (i = 0; i < trunc; i++)
        {
#if  HAVE_NATIVE_mpn_addsub_n
            mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], limbs + 1);
#else
            mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);
            mpn_sub_n(ii[i], ii[i], ii[n+i], limbs + 1);
#endif
        }
    } else
    {
        mpir_ifft_radix2(ii, n/2, 2*w, t1, t2);

        for (i = trunc - n; i < n; i++)
        {
            mpn_sub_n(ii[i+n], ii[i], ii[i+n], limbs + 1);
            mpir_fft_adjust(*t1, ii[i+n], i, limbs, w);
            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);
            MP_PTR_SWAP(ii[i+n], *t1);
        }
   
        mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);

        for (i = 0; i < trunc - n; i++) 
        {   
            mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);
   
            MP_PTR_SWAP(ii[i],   *t1);
            MP_PTR_SWAP(ii[n+i], *t2);
        }
    }
}
Example #3
0
static void	mpn_karasub(mp_ptr rp,mp_ptr tp,mp_size_t n)
{mp_size_t n2,n3;mp_limb_t c1=0,c2,c3,top[2];

n2=n>>1;n3=n-n2;
c2=mpn_addsub_n(tp,rp,rp+2*n2,tp,2*n2);
//if(n3!=n2)c1=mpn_sub_n(tp+2*n2,rp+4*n2,tp+2*n2,2);
c3=mpn_add_n(rp+n2,rp+n2,tp,2*n2);//c3=mpn_add_n(rp+n2,rp+n2,tp,2*n3);
top[1]=rp[2*n2+2*n3-1];top[0]=rp[2*n2+2*n3-2];
mpn_incr_u(rp+3*n2,c3);//mpn_incr_u(rp+n2+2*n3,c3);
if(c2==1)mpn_incr_u(rp+3*n2,1);
if(c2==-1)mpn_decr_u(rp+3*n2,1);
//mpn_decr_u(rp+n2+2*n3,c1);
if(n2==n3)return;
c1=mpn_sub_n(rp+3*n2,rp+3*n2,tp+2*n2,2);
c2=mpn_add_n(rp+3*n2,rp+3*n2,top,2);
if(c2==1 && c1==0)mpn_incr_u(rp+3*n2+2,1);
if(c2==0 && c1==1)mpn_decr_u(rp+3*n2+2,1);
return;}
Example #4
0
void
mpn_toom53_mul (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t n, s, t;
  int vm1_neg, vmh_neg;
  mp_limb_t cy;
  mp_ptr gp, hp;
  mp_ptr as1, asm1, as2, ash, asmh;
  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
  enum toom4_flags flags;
  TMP_DECL;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define a3  (ap + 3*n)
#define a4  (ap + 4*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);

  s = an - 4 * n;
  t = bn - 2 * n;

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  TMP_MARK;

  as1  = TMP_SALLOC_LIMBS (n + 1);
  asm1 = TMP_SALLOC_LIMBS (n + 1);
  as2  = TMP_SALLOC_LIMBS (n + 1);
  ash  = TMP_SALLOC_LIMBS (n + 1);
  asmh = TMP_SALLOC_LIMBS (n + 1);

  bs1  = TMP_SALLOC_LIMBS (n + 1);
  bsm1 = TMP_SALLOC_LIMBS (n + 1);
  bs2  = TMP_SALLOC_LIMBS (n + 1);
  bsh  = TMP_SALLOC_LIMBS (n + 1);
  bsmh = TMP_SALLOC_LIMBS (n + 1);

  gp = pp;
  hp = pp + n + 1;

  /* Compute as1 and asm1.  */
  gp[n]  = mpn_add_n (gp, a0, a2, n);
  gp[n] += mpn_add   (gp, gp, n, a4, s);
  hp[n]  = mpn_add_n (hp, a1, a3, n);
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#else
  mpn_add_n (as1, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asm1, hp, gp, n + 1);
      vm1_neg = 1;
    }
  else
    {
      mpn_sub_n (asm1, gp, hp, n + 1);
      vm1_neg = 0;
    }
#endif

  /* Compute as2.  */
#if !HAVE_NATIVE_mpn_addlsh_n
  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
#endif
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a3, a4, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy  = mpn_lshift (as2, a4, s, 1);
  cy += mpn_add_n (as2, a3, as2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
  cy += mpn_add_n (as2, a1, as2, n);
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
  mpn_add_n (as2, ash, as2, n + 1);
#endif

  /* Compute ash and asmh.  */
#if HAVE_NATIVE_mpn_addlsh_n
  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
  gp[n] = cy;
  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
  hp[n] = cy;
#else
  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
  hp[n] = cy;
#endif
#if HAVE_NATIVE_mpn_addsub_n
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#else
  mpn_add_n (ash, gp, hp, n + 1);
  if (mpn_cmp (gp, hp, n + 1) < 0)
    {
      mpn_sub_n (asmh, hp, gp, n + 1);
      vmh_neg = 1;
    }
  else
    {
      mpn_sub_n (asmh, gp, hp, n + 1);
      vmh_neg = 0;
    }
#endif

  /* Compute bs1 and bsm1.  */
  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
#if HAVE_NATIVE_mpn_addsub_n
  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
    {
      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
Example #5
0
/*
   We have

	{v0,2k} {v1,2k+1} {c+4k+1,r+r2-1} 
		v0 	v1       {-}vinf

	vinf0 is the first limb of vinf, which is overwritten by v1

	{vm1,2k+1} {v2, 2k+1}

	ws is temporary space

	sa is the sign of vm1

	rr2 is r+r2

	We want to compute

     t1   <- (3*v0+2*vm1+v2)/6-2*vinf
     t2   <- (v1+vm1)/2
  then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
     c0   <- v0
     c1   <- v1 - t1
     c2   <- t2 - v0 - vinf
     c3   <- t1 - t2
     c4   <- vinf
*/ 
void
mpn_toom3_interpolate (mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1,
		                 mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa,
		                                       mp_limb_t vinf0, mp_ptr ws)
{
  mp_limb_t cy, saved;
  mp_size_t twok = k + k;
  mp_size_t kk1 = twok + 1;
  mp_ptr c1, c2, c3, c4, c5;
  mp_limb_t cout; /* final carry, should be zero at the end */

  c1 = c + k;
  c2 = c1 + k;
  c3 = c2 + k;
  c4 = c3 + k;
  c5 = c4 + k;

#define v0 (c)

/* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} 
		v0 	 v1          {-}vinf

	{vm1,2k+1} {v2, 2k+1}
*/

  /* v2 <- v2 - vm1 */
  if (sa < 0)
  {
	  mpn_add_n(v2, v2, vm1, kk1);
  } else
  {
	  mpn_sub_n(v2, v2, vm1, kk1);
  }

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */

 /* vm1 <- t2 := (v1 - sa*vm1) / 2
 */
  if (sa < 0)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      mpn_add_n (vm1, vm1, v1, kk1);
      mpn_half (vm1, kk1);
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      mpn_sub_n (vm1, v1, vm1, kk1);
      mpn_half (vm1, kk1);
#endif
    }

  /* v1 <- v1 - v0 - vinf */

  saved = c4[0];
  c4[0] = vinf0;
#if HAVE_NATIVE_mpn_subadd_n
  cy = mpn_subadd_n(v1, v1, v0, c4, rr2);
#else
  cy = mpn_sub_n(v1, v1, v0, rr2);
  cy += mpn_sub_n(v1, v1, c4, rr2);
#endif
  c4[0] = saved;
  if (rr2 < twok)
  {
	  v1[twok] -= mpn_sub_n(v1 + rr2, v1 + rr2, v0 + rr2, twok - rr2); 
	  MPN_DECR_U(v1 + rr2, kk1 - rr2, cy);
  }
  else v1[twok] -= cy;

  saved = c4[0];
  c4[0] = vinf0;
/* subtract 5*vinf from v2,
  */
  cy = mpn_submul_1 (v2, c4, rr2, CNST_LIMB(5));
  MPN_DECR_U (v2 + rr2, kk1 - rr2, cy);
  c4[0] = saved;

  /* v2 = (v2 - v1)/2 (exact)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  mpn_sub_n (v2, v2, v1, kk1);
  mpn_half (v2, kk1);
#endif

  /* v1 = v1 - vm1
  */
  mpn_sub_n(v1, v1, vm1, kk1);

  /* vm1 = vm1 - v2 and add vm1 in {c+k, ...} */
#if HAVE_NATIVE_mpn_addsub_n
  cy = mpn_addsub_n(c1, c1, vm1, v2, kk1);
#else
  mpn_sub_n(vm1, vm1, v2, kk1);
  cy = mpn_add_n (c1, c1, vm1, kk1);
#endif
  ASSERT_NOCARRY (mpn_add_1(c3 + 1, c3 + 1, rr2 + k - 1, cy)); /* 4k+rr2-(3k+1) = rr2+k-1 */

  /* don't forget to add vinf0 in {c+4k, ...} */
  ASSERT_NOCARRY (mpn_add_1(c4, c4, rr2, vinf0));

  /* add v2 in {c+3k, ...} */
  if (rr2 <= k + 1)
     ASSERT_NOCARRY (mpn_add_n (c3, c3, v2, k+rr2));
  else
  {
	  cy = mpn_add_n (c3, c3, v2, kk1);
     if (cy) ASSERT_NOCARRY (mpn_add_1(c5 + 1, c5 + 1, rr2 - k - 1, cy)); /* 4k+rr2-(5k+1) = rr2-k-1 */
  }

#undef v0
}