void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, mp_size_t n1, mp_size_t trunc, mp_limb_t * tt) { mp_size_t i, j, s; mp_size_t n2 = (2*n)/n1; mp_size_t trunc2 = (trunc - 2*n)/n1; mp_size_t limbs = (n*w)/FLINT_BITS; mp_bitcnt_t depth = 0; mp_bitcnt_t depth2 = 0; while ((UWORD(1)<<depth) < n2) depth++; while ((UWORD(1)<<depth2) < n1) depth2++; ii += 2*n; jj += 2*n; /* convolutions on relevant rows */ for (s = 0; s < trunc2; s++) { i = n_revbin(s, depth); fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } ii -= 2*n; jj -= 2*n; /* convolutions on rows */ for (i = 0; i < n2; i++) { fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } }
void fft_convolution(mp_limb_t ** ii, mp_limb_t ** jj, long depth, long limbs, long trunc, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** s1, mp_limb_t * tt) { long n = (1L<<depth), j; long w = (limbs*FLINT_BITS)/n; long sqrt = (1L<<(depth/2)); if (depth <= 6) { trunc = 2*((trunc + 1)/2); fft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc); if (ii != jj) fft_truncate_sqrt2(jj, n, w, t1, t2, s1, trunc); for (j = 0; j < trunc; j++) { mpn_normmod_2expp1(ii[j], limbs); if (ii != jj) mpn_normmod_2expp1(jj[j], limbs); fft_mulmod_2expp1(ii[j], ii[j], jj[j], n, w, tt); } ifft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc); for (j = 0; j < trunc; j++) { mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2); mpn_normmod_2expp1(ii[j], limbs); } } else { trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt)); fft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc); if (ii != jj) fft_mfa_truncate_sqrt2_outer(jj, n, w, t1, t2, s1, sqrt, trunc); fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, t1, t2, s1, sqrt, trunc, tt); ifft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc); } }
int mpn_mulmod_Bexpp1(mp_ptr r, mp_srcptr i1, mp_srcptr i2, mp_size_t limbs, mp_ptr tt) { mp_size_t bits = limbs * GMP_LIMB_BITS; mp_bitcnt_t depth1, depth = 1; mp_size_t w1, off; mp_limb_t c = 2 * i1[limbs] + i2[limbs]; if (c & 1) { mpn_neg_n(r, i1, limbs + 1); mpn_normmod_2expp1(r, limbs); return 0; } else if (c & 2) { mpn_neg_n(r, i2, limbs + 1); mpn_normmod_2expp1(r, limbs); return 0; } if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) { if(bits) r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt); else r[limbs] = 0; return r[limbs]; } while ((((mp_limb_t)1)<<depth) < bits) depth++; if (depth < 12) off = mulmod_2expp1_table_n[0]; else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12]; depth1 = depth/2 - off; w1 = bits/(((mp_limb_t)1)<<(2*depth1)); mpir_fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1); return r[limbs]; }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_mfa_truncate_sqrt2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 13; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t trunc = 2*n + n_randint(state, 2*n) + 1; mp_size_t n1 = (1UL<<(depth/2)); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, * t1, * t2, * s1; trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1)); ii = flint_malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; s1 = t2 + size; for (i = 0; i < 4*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); ifft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); for (i = 0; i < trunc; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < trunc; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("n = %ld, trunc = %ld\n", n, trunc); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
int main(void) { int i, result; flint_rand_t state; printf("get/set_fft...."); fflush(stdout); flint_randinit(state); /* convert back and forth and compare */ for (i = 0; i < 10000; i++) { fmpz * a, * b; mp_bitcnt_t bits; long len, limbs; mp_limb_t ** ii, * ptr; long i, bt; bits = n_randint(state, 300) + 1; len = n_randint(state, 300) + 1; limbs = 2*((bits - 1)/FLINT_BITS + 1); ii = flint_malloc((len + len*(limbs + 1))*sizeof(mp_limb_t)); ptr = (mp_limb_t *) ii + len; for (i = 0; i < len; i++, ptr += (limbs + 1)) ii[i] = ptr; a = _fmpz_vec_init(len); b = _fmpz_vec_init(len); _fmpz_vec_randtest(a, state, len, bits); bt = _fmpz_vec_get_fft(ii, a, limbs, len); for (i = 0; i < len; i++) mpn_normmod_2expp1(ii[i], limbs); _fmpz_vec_set_fft(b, len, ii, limbs, bt < 0); result = (_fmpz_vec_equal(a, b, len)); if (!result) { printf("FAIL:\n"); _fmpz_vec_print(a, len), printf("\n\n"); _fmpz_vec_print(b, len), printf("\n\n"); abort(); } _fmpz_vec_clear(a, len); _fmpz_vec_clear(b, len); } /* convert back and forth unsigned and compare */ for (i = 0; i < 10000; i++) { fmpz * a, * b; mp_bitcnt_t bits; long len, limbs; mp_limb_t ** ii, * ptr; long i, bt; bits = n_randint(state, 300) + 1; len = n_randint(state, 300) + 1; limbs = 2*((bits - 1)/FLINT_BITS + 1); ii = flint_malloc((len + len*(limbs + 1))*sizeof(mp_limb_t)); ptr = (mp_limb_t *) ii + len; for (i = 0; i < len; i++, ptr += (limbs + 1)) ii[i] = ptr; a = _fmpz_vec_init(len); b = _fmpz_vec_init(len); _fmpz_vec_randtest_unsigned(a, state, len, bits); bt = _fmpz_vec_get_fft(ii, a, limbs, len); _fmpz_vec_set_fft(b, len, ii, limbs, bt < 0); result = (_fmpz_vec_equal(a, b, len)); if (!result) { printf("FAIL:\n"); _fmpz_vec_print(a, len), printf("\n\n"); _fmpz_vec_print(b, len), printf("\n\n"); abort(); } _fmpz_vec_clear(a, len); _fmpz_vec_clear(b, len); } flint_randclear(state); _fmpz_cleanup(); printf("PASS\n"); return 0; }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_radix2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *t1, *t2; ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_radix2(ii, n, w, &t1, &t2); ifft_radix2(ii, n, w, &t1, &t2); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w) { mp_size_t n = (((mp_size_t)1)<<depth); mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n); mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i, j, ll; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0; mp_limb_t c; TMP_DECL; TMP_MARK; ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size); for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; } ii0 = ptr; t1 = ii0 + 2*n; t2 = t1 + size; s1 = t2 + size; r = s1 + size; tt = r + 2*n; if (i1 != i2) { jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n); for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; } jj0 = ptr; } else { jj = ii; jj0 = ii0; } j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(ii[j], limbs + 1); for (i = 0; i < 2*n; i++) ii0[i] = ii[i][0]; mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); for (j = 0; j < 2*n; j++) mpn_normmod_2expp1(ii[j], limbs); if (i1 != i2) { j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(jj[j], limbs + 1); for (i = 0; i < 2*n; i++) jj0[i] = jj[i][0]; mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1); } for (j = 0; j < 2*n; j++) { if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n); for (j = 0; j < 2*n; j++) { mp_limb_t t, cy2; mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1); mpn_normmod_2expp1(ii[j], limbs); t = ii[j][limbs]; ii[j][limbs] = r[j] - ii[j][0]; cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]); add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t); if (cy2) r[j]++; } mpn_zero(r1, r_limbs + 1); mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1); /* as the negacyclic convolution has effectively done subtractions some of the coefficients will be negative, so need to subtract p */ ll = 0; limb_add = bits1/GMP_LIMB_BITS; for (j = 0; j < 2*n - 2; j++) { if (r[j]) mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ { mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1); } ll += limb_add; } /* penultimate coefficient, top bit was already ignored */ if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); /* final coefficient wraps around */ if (limb_add) r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add); c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add); mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c); mpn_normmod_2expp1(r1, r_limbs); TMP_FREE; }
int main(void) { mp_bitcnt_t bits; mp_size_t j, k, n, w, limbs; mp_limb_t * nn; mpz_t p, m1, m2; FLINT_TEST_INIT(state); flint_printf("normmod_2expp1...."); fflush(stdout); _flint_rand_init_gmp(state); mpz_init(m1); mpz_init(m2); mpz_init(p); /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */ for (bits = FLINT_BITS; bits < 32*FLINT_BITS; bits += FLINT_BITS) { for (j = 1; j < 32; j++) { for (k = 1; k <= GMP_NUMB_BITS; k <<= 1) { n = bits/k; w = j*k; limbs = (n*w)/GMP_LIMB_BITS; nn = flint_malloc((limbs + 1)*sizeof(mp_limb_t)); random_fermat(nn, state, limbs); fermat_to_mpz(m1, nn, limbs); set_p(p, n, w); mpn_normmod_2expp1(nn, limbs); fermat_to_mpz(m2, nn, limbs); mpz_mod(m1, m1, p); if (mpz_cmp(m1, m2) != 0) { flint_printf("FAIL:\n"); flint_printf("mpn_normmod_2expp1 error\n"); gmp_printf("want %Zx\n\n", m1); gmp_printf("got %Zx\n", m2); abort(); } flint_free(nn); } } } mpz_clear(m2); mpz_clear(m1); mpz_clear(p); FLINT_TEST_CLEANUP(state); flint_printf("PASS\n"); return 0; }
int main(void) { mp_bitcnt_t depth, w; gmp_randstate_t state; tests_start(); fflush(stdout); gmp_randinit_default(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (((mp_limb_t)1)<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, * t1, * t2, * s1; ii = malloc((2*(n + n*size) + 3*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; mpir_random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; s1 = t2 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } free(ii); free(jj); } } gmp_randclear(state); tests_end(); return 0; }
int main(void) { mp_bitcnt_t depth, w; int iters; flint_rand_t state; printf("mulmod_2expp1...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (iters = 0; iters < 100; iters++) { for (depth = 6; depth <= 18; depth++) { for (w = 1; w <= 2; w++) { mp_size_t n = (1UL<<depth); mp_bitcnt_t bits = n*w; mp_size_t int_limbs = bits/FLINT_BITS; mp_size_t j; mp_limb_t c, * i1, * i2, * r1, * r2, * tt; i1 = flint_malloc(6*(int_limbs+1)*sizeof(mp_limb_t)); i2 = i1 + int_limbs + 1; r1 = i2 + int_limbs + 1; r2 = r1 + int_limbs + 1; tt = r2 + int_limbs + 1; random_fermat(i1, state, int_limbs); random_fermat(i2, state, int_limbs); mpn_normmod_2expp1(i1, int_limbs); mpn_normmod_2expp1(i2, int_limbs); fft_mulmod_2expp1(r2, i1, i2, n, w, tt); c = i1[int_limbs] + 2*i2[int_limbs]; c = mpn_mulmod_2expp1(r1, i1, i2, c, int_limbs*FLINT_BITS, tt); for (j = 0; j < int_limbs; j++) { if (r1[j] != r2[j]) { printf("error in limb %ld, %lx != %lx\n", j, r1[j], r2[j]); abort(); } } if (c != r2[int_limbs]) { printf("error in limb %ld, %lx != %lx\n", j, c, r2[j]); abort(); } flint_free(i1); } } } /* test squaring */ for (iters = 0; iters < 100; iters++) { for (depth = 6; depth <= 18; depth++) { for (w = 1; w <= 2; w++) { mp_size_t n = (1UL<<depth); mp_bitcnt_t bits = n*w; mp_size_t int_limbs = bits/FLINT_BITS; mp_size_t j; mp_limb_t c, * i1, * r1, * r2, * tt; i1 = flint_malloc(5*(int_limbs+1)*sizeof(mp_limb_t)); r1 = i1 + int_limbs + 1; r2 = r1 + int_limbs + 1; tt = r2 + int_limbs + 1; random_fermat(i1, state, int_limbs); mpn_normmod_2expp1(i1, int_limbs); fft_mulmod_2expp1(r2, i1, i1, n, w, tt); c = i1[int_limbs] + 2*i1[int_limbs]; c = mpn_mulmod_2expp1(r1, i1, i1, c, int_limbs*FLINT_BITS, tt); for (j = 0; j < int_limbs; j++) { if (r1[j] != r2[j]) { printf("error in limb %ld, %lx != %lx\n", j, r1[j], r2[j]); abort(); } } if (c != r2[int_limbs]) { printf("error in limb %ld, %lx != %lx\n", j, c, r2[j]); abort(); } flint_free(i1); } } } flint_randclear(state); printf("PASS\n"); return 0; }