void ifft_radix2(mp_limb_t ** ii, mp_size_t n, mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2) { mp_size_t i; mp_size_t limbs = (w*n)/FLINT_BITS; if (n == 1) { ifft_butterfly(*t1, *t2, ii[0], ii[1], 0, limbs, w); SWAP_PTRS(ii[0], *t1); SWAP_PTRS(ii[1], *t2); return; } ifft_radix2(ii, n/2, 2*w, t1, t2); ifft_radix2(ii+n, n/2, 2*w, t1, t2); for (i = 0; i < n; i++) { ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); SWAP_PTRS(ii[i], *t1); SWAP_PTRS(ii[n+i], *t2); } }
void ifft_negacyclic(mp_limb_t ** ii, mp_size_t n, mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) { mp_size_t i; mp_size_t limbs = (w*n)/FLINT_BITS; ifft_radix2(ii, n/2, 2*w, t1, t2); ifft_radix2(ii+n, n/2, 2*w, t1, t2); if (w & 1) { for (i = 0; i < n; i++) { ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); SWAP_PTRS(ii[i], *t1); SWAP_PTRS(ii[n+i], *t2); fft_adjust(*t1, ii[i], n - i/2, limbs, w); mpn_neg_n(*t1, *t1, limbs + 1); SWAP_PTRS(ii[i], *t1); fft_adjust(*t2, ii[n+i], n - (n+i)/2, limbs, w); mpn_neg_n(*t2, *t2, limbs + 1); SWAP_PTRS(ii[n+i], *t2); i++; ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); SWAP_PTRS(ii[i], *t1); SWAP_PTRS(ii[n+i], *t2); fft_adjust_sqrt2(*t1, ii[i], 2*n-i, limbs, w, *temp); mpn_neg_n(*t1, *t1, limbs + 1); SWAP_PTRS(ii[i], *t1); fft_adjust_sqrt2(*t2, ii[n+i], n-i, limbs, w, *temp); mpn_neg_n(*t2, *t2, limbs + 1); SWAP_PTRS(ii[n+i], *t2); } } else { for (i = 0; i < n; i++) { ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w); SWAP_PTRS(ii[i], *t1); SWAP_PTRS(ii[n+i], *t2); fft_adjust(*t1, ii[i], 2*n-i, limbs, w/2); mpn_neg_n(*t1, *t1, limbs + 1); SWAP_PTRS(ii[i], *t1); fft_adjust(*t2, ii[n+i], n-i, limbs, w/2); mpn_neg_n(*t2, *t2, limbs + 1); SWAP_PTRS(ii[n+i], *t2); } } }
void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, mp_size_t n1, mp_size_t trunc, mp_limb_t * tt) { mp_size_t i, j, s; mp_size_t n2 = (2*n)/n1; mp_size_t trunc2 = (trunc - 2*n)/n1; mp_size_t limbs = (n*w)/FLINT_BITS; mp_bitcnt_t depth = 0; mp_bitcnt_t depth2 = 0; while ((UWORD(1)<<depth) < n2) depth++; while ((UWORD(1)<<depth2) < n1) depth2++; ii += 2*n; jj += 2*n; /* convolutions on relevant rows */ for (s = 0; s < trunc2; s++) { i = n_revbin(s, depth); fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } ii -= 2*n; jj -= 2*n; /* convolutions on rows */ for (i = 0; i < n2; i++) { fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_radix2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *t1, *t2; ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_radix2(ii, n, w, &t1, &t2); ifft_radix2(ii, n, w, &t1, &t2); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }