int main (int argc, char **argv) { mp_ptr r1p, r2p, s1p, s2p; double t; mp_size_t n; n = strtol (argv[1], 0, 0); r1p = malloc (n * BYTES_PER_MP_LIMB); r2p = malloc (n * BYTES_PER_MP_LIMB); s1p = malloc (n * BYTES_PER_MP_LIMB); s2p = malloc (n * BYTES_PER_MP_LIMB); TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n))); printf (" separate add and sub: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n)); printf ("combined addsub separate variables: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r1 overlap: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r2 overlap: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n)); printf (" combined addsub in-place: %.3f\n", t); return 0; }
void mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, mp_size_t trunc) { mp_size_t i; mp_size_t limbs = (w*n)/GMP_LIMB_BITS; if (trunc == 2*n) mpir_ifft_radix2(ii, n, w, t1, t2); else if (trunc <= n) { for (i = trunc; i < n; i++) { mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1); mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, 1); } mpir_ifft_trunc1(ii, n/2, 2*w, t1, t2, trunc); for (i = 0; i < trunc; i++) { #if HAVE_NATIVE_mpn_addsub_n mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], limbs + 1); #else mpn_add_n(ii[i], ii[i], ii[i], limbs + 1); mpn_sub_n(ii[i], ii[i], ii[n+i], limbs + 1); #endif } } else { mpir_ifft_radix2(ii, n/2, 2*w, t1, t2); for (i = trunc - n; i < n; i++) { mpn_sub_n(ii[i+n], ii[i], ii[i+n], limbs + 1); mpir_fft_adjust(*t1, ii[i+n], i, limbs, w); mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1); MP_PTR_SWAP(ii[i+n], *t1); } mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n); for (i = 0; i < trunc - n; i++) { mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); MP_PTR_SWAP(ii[i], *t1); MP_PTR_SWAP(ii[n+i], *t2); } } }
static void mpn_karasub(mp_ptr rp,mp_ptr tp,mp_size_t n) {mp_size_t n2,n3;mp_limb_t c1=0,c2,c3,top[2]; n2=n>>1;n3=n-n2; c2=mpn_addsub_n(tp,rp,rp+2*n2,tp,2*n2); //if(n3!=n2)c1=mpn_sub_n(tp+2*n2,rp+4*n2,tp+2*n2,2); c3=mpn_add_n(rp+n2,rp+n2,tp,2*n2);//c3=mpn_add_n(rp+n2,rp+n2,tp,2*n3); top[1]=rp[2*n2+2*n3-1];top[0]=rp[2*n2+2*n3-2]; mpn_incr_u(rp+3*n2,c3);//mpn_incr_u(rp+n2+2*n3,c3); if(c2==1)mpn_incr_u(rp+3*n2,1); if(c2==-1)mpn_decr_u(rp+3*n2,1); //mpn_decr_u(rp+n2+2*n3,c1); if(n2==n3)return; c1=mpn_sub_n(rp+3*n2,rp+3*n2,tp+2*n2,2); c2=mpn_add_n(rp+3*n2,rp+3*n2,top,2); if(c2==1 && c1==0)mpn_incr_u(rp+3*n2+2,1); if(c2==0 && c1==1)mpn_decr_u(rp+3*n2+2,1); return;}
void mpn_toom53_mul (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; int vm1_neg, vmh_neg; mp_limb_t cy; mp_ptr gp, hp; mp_ptr as1, asm1, as2, ash, asmh; mp_ptr bs1, bsm1, bs2, bsh, bsmh; enum toom4_flags flags; TMP_DECL; #define a0 ap #define a1 (ap + n) #define a2 (ap + 2*n) #define a3 (ap + 3*n) #define a4 (ap + 4*n) #define b0 bp #define b1 (bp + n) #define b2 (bp + 2*n) n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); s = an - 4 * n; t = bn - 2 * n; ASSERT (0 < s && s <= n); ASSERT (0 < t && t <= n); TMP_MARK; as1 = TMP_SALLOC_LIMBS (n + 1); asm1 = TMP_SALLOC_LIMBS (n + 1); as2 = TMP_SALLOC_LIMBS (n + 1); ash = TMP_SALLOC_LIMBS (n + 1); asmh = TMP_SALLOC_LIMBS (n + 1); bs1 = TMP_SALLOC_LIMBS (n + 1); bsm1 = TMP_SALLOC_LIMBS (n + 1); bs2 = TMP_SALLOC_LIMBS (n + 1); bsh = TMP_SALLOC_LIMBS (n + 1); bsmh = TMP_SALLOC_LIMBS (n + 1); gp = pp; hp = pp + n + 1; /* Compute as1 and asm1. */ gp[n] = mpn_add_n (gp, a0, a2, n); gp[n] += mpn_add (gp, gp, n, a4, s); hp[n] = mpn_add_n (hp, a1, a3, n); #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (as1, asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_addsub_n (as1, asm1, gp, hp, n + 1); vm1_neg = 0; } #else mpn_add_n (as1, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asm1, hp, gp, n + 1); vm1_neg = 1; } else { mpn_sub_n (asm1, gp, hp, n + 1); vm1_neg = 0; } #endif /* Compute as2. */ #if !HAVE_NATIVE_mpn_addlsh_n ash[n] = mpn_lshift (ash, a2, n, 2); /* 4a2 */ #endif #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n (as2, a3, a4, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n); cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n); as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); #else cy = mpn_lshift (as2, a4, s, 1); cy += mpn_add_n (as2, a3, as2, s); if (s != n) cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy); cy = 4 * cy + mpn_lshift (as2, as2, n, 2); cy += mpn_add_n (as2, a1, as2, n); cy = 2 * cy + mpn_lshift (as2, as2, n, 1); as2[n] = cy + mpn_add_n (as2, a0, as2, n); mpn_add_n (as2, ash, as2, n + 1); #endif /* Compute ash and asmh. */ #if HAVE_NATIVE_mpn_addlsh_n cy = mpn_addlsh_n (gp, a2, a0, n, 2); /* 4a0 + a2 */ cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2); /* 16a0 + 4a2 + a4 */ /* FIXME s */ gp[n] = cy; cy = mpn_addlsh_n (hp, a3, a1, n, 2); /* 4a1 + a3 */ cy = 2 * cy + mpn_lshift (hp, hp, n, 1); /* 8a1 + 2a3 */ hp[n] = cy; #else gp[n] = mpn_lshift (gp, a0, n, 4); /* 16a0 */ mpn_add (gp, gp, n + 1, a4, s); /* 16a0 + a4 */ mpn_add_n (gp, ash, gp, n+1); /* 16a0 + 4a2 + a4 */ cy = mpn_lshift (hp, a1, n, 3); /* 8a1 */ cy += mpn_lshift (ash, a3, n, 1); /* 2a3 */ cy += mpn_add_n (hp, ash, hp, n); /* 8a1 + 2a3 */ hp[n] = cy; #endif #if HAVE_NATIVE_mpn_addsub_n if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_addsub_n (ash, asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_addsub_n (ash, asmh, gp, hp, n + 1); vmh_neg = 0; } #else mpn_add_n (ash, gp, hp, n + 1); if (mpn_cmp (gp, hp, n + 1) < 0) { mpn_sub_n (asmh, hp, gp, n + 1); vmh_neg = 1; } else { mpn_sub_n (asmh, gp, hp, n + 1); vmh_neg = 0; } #endif /* Compute bs1 and bsm1. */ bs1[n] = mpn_add (bs1, b0, n, b2, t); /* b0 + b2 */ #if HAVE_NATIVE_mpn_addsub_n if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0) { bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1; bsm1[n] = 0; vm1_neg ^= 1; }
/* We have {v0,2k} {v1,2k+1} {c+4k+1,r+r2-1} v0 v1 {-}vinf vinf0 is the first limb of vinf, which is overwritten by v1 {vm1,2k+1} {v2, 2k+1} ws is temporary space sa is the sign of vm1 rr2 is r+r2 We want to compute t1 <- (3*v0+2*vm1+v2)/6-2*vinf t2 <- (v1+vm1)/2 then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where c0 <- v0 c1 <- v1 - t1 c2 <- t2 - v0 - vinf c3 <- t1 - t2 c4 <- vinf */ void mpn_toom3_interpolate (mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1, mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa, mp_limb_t vinf0, mp_ptr ws) { mp_limb_t cy, saved; mp_size_t twok = k + k; mp_size_t kk1 = twok + 1; mp_ptr c1, c2, c3, c4, c5; mp_limb_t cout; /* final carry, should be zero at the end */ c1 = c + k; c2 = c1 + k; c3 = c2 + k; c4 = c3 + k; c5 = c4 + k; #define v0 (c) /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} v0 v1 {-}vinf {vm1,2k+1} {v2, 2k+1} */ /* v2 <- v2 - vm1 */ if (sa < 0) { mpn_add_n(v2, v2, vm1, kk1); } else { mpn_sub_n(v2, v2, vm1, kk1); } ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ /* vm1 <- t2 := (v1 - sa*vm1) / 2 */ if (sa < 0) { #ifdef HAVE_NATIVE_mpn_rsh1add_n mpn_rsh1add_n (vm1, v1, vm1, kk1); #else mpn_add_n (vm1, vm1, v1, kk1); mpn_half (vm1, kk1); #endif } else { #ifdef HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (vm1, v1, vm1, kk1); #else mpn_sub_n (vm1, v1, vm1, kk1); mpn_half (vm1, kk1); #endif } /* v1 <- v1 - v0 - vinf */ saved = c4[0]; c4[0] = vinf0; #if HAVE_NATIVE_mpn_subadd_n cy = mpn_subadd_n(v1, v1, v0, c4, rr2); #else cy = mpn_sub_n(v1, v1, v0, rr2); cy += mpn_sub_n(v1, v1, c4, rr2); #endif c4[0] = saved; if (rr2 < twok) { v1[twok] -= mpn_sub_n(v1 + rr2, v1 + rr2, v0 + rr2, twok - rr2); MPN_DECR_U(v1 + rr2, kk1 - rr2, cy); } else v1[twok] -= cy; saved = c4[0]; c4[0] = vinf0; /* subtract 5*vinf from v2, */ cy = mpn_submul_1 (v2, c4, rr2, CNST_LIMB(5)); MPN_DECR_U (v2 + rr2, kk1 - rr2, cy); c4[0] = saved; /* v2 = (v2 - v1)/2 (exact) */ #ifdef HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (v2, v2, v1, kk1); #else mpn_sub_n (v2, v2, v1, kk1); mpn_half (v2, kk1); #endif /* v1 = v1 - vm1 */ mpn_sub_n(v1, v1, vm1, kk1); /* vm1 = vm1 - v2 and add vm1 in {c+k, ...} */ #if HAVE_NATIVE_mpn_addsub_n cy = mpn_addsub_n(c1, c1, vm1, v2, kk1); #else mpn_sub_n(vm1, vm1, v2, kk1); cy = mpn_add_n (c1, c1, vm1, kk1); #endif ASSERT_NOCARRY (mpn_add_1(c3 + 1, c3 + 1, rr2 + k - 1, cy)); /* 4k+rr2-(3k+1) = rr2+k-1 */ /* don't forget to add vinf0 in {c+4k, ...} */ ASSERT_NOCARRY (mpn_add_1(c4, c4, rr2, vinf0)); /* add v2 in {c+3k, ...} */ if (rr2 <= k + 1) ASSERT_NOCARRY (mpn_add_n (c3, c3, v2, k+rr2)); else { cy = mpn_add_n (c3, c3, v2, kk1); if (cy) ASSERT_NOCARRY (mpn_add_1(c5 + 1, c5 + 1, rr2 - k - 1, cy)); /* 4k+rr2-(5k+1) = rr2-k-1 */ } #undef v0 }