/* in1 += x*in2 assuming both in1 and in2 are revbin'd. */ void _fmpz_vec_add_rev(fmpz * in1, fmpz * in2, long bits) { long i; for (i = 0; i < (1L << bits) - 1; i++) { long j = n_revbin(n_revbin(i, bits) + 1, bits); fmpz_add(in1 + j, in1 + j, in2 + i); } }
/* Switches the coefficients of poly in of length 2^bits into a poly out of length len. */ void revbin2(fmpz * out, const fmpz * in, long len, long bits) { long i; for (i = 0; i < len; i++) out[i] = in[n_revbin(i, bits)]; }
/* Switches the coefficients of poly in of length len into a poly out of length 2^bits. */ void revbin1(fmpz * out, const fmpz * in, slong len, slong bits) { slong i; for (i = 0; i < len; i++) out[n_revbin(i, bits)] = in[i]; }
void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, mp_size_t n1, mp_size_t trunc, mp_limb_t * tt) { mp_size_t i, j, s; mp_size_t n2 = (2*n)/n1; mp_size_t trunc2 = (trunc - 2*n)/n1; mp_size_t limbs = (n*w)/FLINT_BITS; mp_bitcnt_t depth = 0; mp_bitcnt_t depth2 = 0; while ((UWORD(1)<<depth) < n2) depth++; while ((UWORD(1)<<depth2) < n1) depth2++; ii += 2*n; jj += 2*n; /* convolutions on relevant rows */ for (s = 0; s < trunc2; s++) { i = n_revbin(s, depth); fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } ii -= 2*n; jj -= 2*n; /* convolutions on rows */ for (i = 0; i < n2; i++) { fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2); for (j = 0; j < n1; j++) { mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); } }