void ifft_butterfly(mp_limb_t * s, mp_limb_t * t, mp_limb_t * i1, mp_limb_t * i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w) { mp_size_t y; mp_bitcnt_t b1; b1 = i*w; y = b1/FLINT_BITS; b1 = b1%FLINT_BITS; mpn_div_2expmod_2expp1(i2, i2, limbs, b1); butterfly_rshB(s, t, i1, i2, limbs, 0, y); }
void mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, mp_size_t trunc) { mp_size_t i; mp_size_t limbs = (w*n)/GMP_LIMB_BITS; if (trunc == 2*n) mpir_ifft_radix2(ii, n, w, t1, t2); else if (trunc <= n) { for (i = trunc; i < n; i++) { mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1); mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, 1); } mpir_ifft_trunc1(ii, n/2, 2*w, t1, t2, trunc); for (i = 0; i < trunc; i++) { #if HAVE_NATIVE_mpn_addsub_n mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], limbs + 1); #else mpn_add_n(ii[i], ii[i], ii[i], limbs + 1); mpn_sub_n(ii[i], ii[i], ii[n+i], limbs + 1); #endif } } else { mpir_ifft_radix2(ii, n/2, 2*w, t1, t2); for (i = trunc - n; i < n; i++) { mpn_sub_n(ii[i+n], ii[i], ii[i+n], limbs + 1); mpir_fft_adjust(*t1, ii[i+n], i, limbs, w); mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1); MP_PTR_SWAP(ii[i+n], *t1); } mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n); for (i = 0; i < trunc - n; i++) { mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); MP_PTR_SWAP(ii[i], *t1); MP_PTR_SWAP(ii[n+i], *t2); } } }
void fft_convolution(mp_limb_t ** ii, mp_limb_t ** jj, long depth, long limbs, long trunc, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** s1, mp_limb_t * tt) { long n = (1L<<depth), j; long w = (limbs*FLINT_BITS)/n; long sqrt = (1L<<(depth/2)); if (depth <= 6) { trunc = 2*((trunc + 1)/2); fft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc); if (ii != jj) fft_truncate_sqrt2(jj, n, w, t1, t2, s1, trunc); for (j = 0; j < trunc; j++) { mpn_normmod_2expp1(ii[j], limbs); if (ii != jj) mpn_normmod_2expp1(jj[j], limbs); fft_mulmod_2expp1(ii[j], ii[j], jj[j], n, w, tt); } ifft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc); for (j = 0; j < trunc; j++) { mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2); mpn_normmod_2expp1(ii[j], limbs); } } else { trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt)); fft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc); if (ii != jj) fft_mfa_truncate_sqrt2_outer(jj, n, w, t1, t2, s1, sqrt, trunc); fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, t1, t2, s1, sqrt, trunc, tt); ifft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc); } }
int main(void) { mp_bitcnt_t bits; mp_size_t j, k, n, w, limbs, d; mp_limb_t * nn, * r; mpz_t p, m1, m2, mn1, mn2; gmp_randstate_t state; tests_start(); fflush(stdout); gmp_randinit_default(state); mpz_init(m1); mpz_init(m2); mpz_init(mn1); mpz_init(mn2); mpz_init(p); /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */ for (bits = GMP_LIMB_BITS; bits < 16*GMP_LIMB_BITS; bits += GMP_LIMB_BITS) { for (j = 1; j < 32; j++) { for (k = 1; k <= GMP_LIMB_BITS; k <<= 1) { for (d = 0; d < GMP_LIMB_BITS; d++) { n = bits/k; w = j*k; limbs = (n*w)/GMP_LIMB_BITS; nn = malloc((limbs + 1)*sizeof(mp_limb_t)); r = malloc((limbs + 1)*sizeof(mp_limb_t)); mpir_random_fermat(nn, state, limbs); mpir_fermat_to_mpz(mn1, nn, limbs); set_p(p, n, w); mpn_div_2expmod_2expp1(r, nn, limbs, d); mpir_fermat_to_mpz(m2, r, limbs); mpz_mod(m2, m2, p); mpz_mod(m1, mn1, p); mpz_mul_2exp(m2, m2, d); mpz_mod(m2, m2, p); if (mpz_cmp(m1, m2) != 0) { printf("FAIL:\n"); printf("mpn_div_2expmod_2expp1 error\n"); gmp_printf("want %Zx\n\n", m1); gmp_printf("got %Zx\n", m2); abort(); } } free(nn); free(r); } } } mpz_clear(mn2); mpz_clear(mn1); mpz_clear(m2); mpz_clear(m1); mpz_clear(p); gmp_randclear(state); tests_end(); return 0; }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_mfa_truncate_sqrt2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 13; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t trunc = 2*n + n_randint(state, 2*n) + 1; mp_size_t n1 = (1UL<<(depth/2)); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, * t1, * t2, * s1; trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1)); ii = flint_malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; s1 = t2 + size; for (i = 0; i < 4*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); ifft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); for (i = 0; i < trunc; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < trunc; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("n = %ld, trunc = %ld\n", n, trunc); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_radix2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *t1, *t2; ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_radix2(ii, n, w, &t1, &t2); ifft_radix2(ii, n, w, &t1, &t2); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w) { mp_size_t n = (((mp_size_t)1)<<depth); mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n); mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i, j, ll; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0; mp_limb_t c; TMP_DECL; TMP_MARK; ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size); for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; } ii0 = ptr; t1 = ii0 + 2*n; t2 = t1 + size; s1 = t2 + size; r = s1 + size; tt = r + 2*n; if (i1 != i2) { jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n); for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; } jj0 = ptr; } else { jj = ii; jj0 = ii0; } j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(ii[j], limbs + 1); for (i = 0; i < 2*n; i++) ii0[i] = ii[i][0]; mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); for (j = 0; j < 2*n; j++) mpn_normmod_2expp1(ii[j], limbs); if (i1 != i2) { j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(jj[j], limbs + 1); for (i = 0; i < 2*n; i++) jj0[i] = jj[i][0]; mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1); } for (j = 0; j < 2*n; j++) { if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n); for (j = 0; j < 2*n; j++) { mp_limb_t t, cy2; mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1); mpn_normmod_2expp1(ii[j], limbs); t = ii[j][limbs]; ii[j][limbs] = r[j] - ii[j][0]; cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]); add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t); if (cy2) r[j]++; } mpn_zero(r1, r_limbs + 1); mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1); /* as the negacyclic convolution has effectively done subtractions some of the coefficients will be negative, so need to subtract p */ ll = 0; limb_add = bits1/GMP_LIMB_BITS; for (j = 0; j < 2*n - 2; j++) { if (r[j]) mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ { mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1); } ll += limb_add; } /* penultimate coefficient, top bit was already ignored */ if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); /* final coefficient wraps around */ if (limb_add) r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add); c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add); mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c); mpn_normmod_2expp1(r1, r_limbs); TMP_FREE; }
int main(void) { mp_bitcnt_t depth, w; gmp_randstate_t state; tests_start(); fflush(stdout); gmp_randinit_default(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (((mp_limb_t)1)<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, * t1, * t2, * s1; ii = malloc((2*(n + n*size) + 3*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; mpir_random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; s1 = t2 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } free(ii); free(jj); } } gmp_randclear(state); tests_end(); return 0; }