void mpn_normmod_2expp1(mp_limb_t * t, mp_size_t limbs) { mp_limb_signed_t hi = t[limbs]; if (hi) { t[limbs] = 0; mpn_addmod_2expp1_1(t, limbs, -hi); /* hi will now be in [-1,1] */ if ((hi = t[limbs])) { t[limbs] = 0; mpn_addmod_2expp1_1(t, limbs, -hi); if (t[limbs] == ~(mp_limb_signed_t) 0) /* if we now have -1 (very unlikely) */ { t[limbs] = 0; mpn_addmod_2expp1_1(t, limbs, 1); } } } }
void fft_adjust_sqrt2(mp_limb_t * r, mp_limb_t * i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_limb_t * temp) { mp_bitcnt_t wn = limbs*FLINT_BITS; mp_limb_t cy; mp_size_t j = i/2, k = w/2; mp_size_t y; mp_bitcnt_t b1; int negate = 0; b1 = j + wn/4 + i*k; if (b1 >= wn) { negate = 1; b1 -= wn; } y = b1/FLINT_BITS; b1 = b1%FLINT_BITS; /* multiply by 2^{j + wn/4 + i*k} */ if (y) { mpn_copyi(temp + y, i1, limbs - y); cy = mpn_neg_n(temp, i1 + limbs - y, y); temp[limbs] = 0; mpn_addmod_2expp1_1(temp + y, limbs - y, -i1[limbs]); mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); mpn_mul_2expmod_2expp1(r, temp, limbs, b1); } else mpn_mul_2expmod_2expp1(r, i1, limbs, b1); /* multiply by 2^{wn/2} */ y = limbs/2; cy = 0; mpn_copyi(temp + y, r, limbs - y); temp[limbs] = 0; if (y) cy = mpn_neg_n(temp, r + limbs - y, y); mpn_addmod_2expp1_1(temp + y, limbs - y, -r[limbs]); mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); /* shift by an additional half limb (rare) */ if (limbs & 1) mpn_mul_2expmod_2expp1(temp, temp, limbs, FLINT_BITS/2); /* subtract */ if (negate) mpn_sub_n(r, r, temp, limbs + 1); else mpn_sub_n(r, temp, r, limbs + 1); }
void mpir_butterfly_lshB(mp_ptr t, mp_ptr u, mp_ptr i1, mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y) { mp_limb_t cy, cy1, cy2; if (x == 0) { if (y == 0) cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs + 1); else { cy = mpn_sumdiff_n(t, u + y, i1, i2, limbs - y); u[limbs] = -(cy&1); cy1 = cy>>1; cy = mpn_sumdiff_n(t + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y); t[limbs] = cy>>1; mpn_add_1(t + limbs - y, t + limbs - y, y + 1, cy1); cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); mpn_addmod_2expp1_1(u + y, limbs - y, cy1); cy1 = -(i1[limbs] + i2[limbs]); mpn_addmod_2expp1_1(t, limbs, cy1); } } else if (y == 0)
void mpir_fft_adjust(mp_ptr r, mp_ptr i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w) { mp_bitcnt_t b1; mp_limb_t cy; mp_size_t x; b1 = i*w; x = b1/GMP_LIMB_BITS; b1 = b1%GMP_LIMB_BITS; if (x) { mpn_copyi(r + x, i1, limbs - x); r[limbs] = 0; cy = mpn_neg_n(r, i1 + limbs - x, x); mpn_addmod_2expp1_1(r + x, limbs - x, -i1[limbs]); mpn_sub_1(r + x, r + x, limbs - x + 1, cy); mpn_mul_2expmod_2expp1(r, r, limbs, b1); } else mpn_mul_2expmod_2expp1(r, i1, limbs, b1); }
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w) { mp_size_t n = (((mp_size_t)1)<<depth); mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n); mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i, j, ll; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0; mp_limb_t c; TMP_DECL; TMP_MARK; ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size); for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; } ii0 = ptr; t1 = ii0 + 2*n; t2 = t1 + size; s1 = t2 + size; r = s1 + size; tt = r + 2*n; if (i1 != i2) { jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n); for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; } jj0 = ptr; } else { jj = ii; jj0 = ii0; } j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(ii[j], limbs + 1); for (i = 0; i < 2*n; i++) ii0[i] = ii[i][0]; mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); for (j = 0; j < 2*n; j++) mpn_normmod_2expp1(ii[j], limbs); if (i1 != i2) { j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(jj[j], limbs + 1); for (i = 0; i < 2*n; i++) jj0[i] = jj[i][0]; mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1); } for (j = 0; j < 2*n; j++) { if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n); for (j = 0; j < 2*n; j++) { mp_limb_t t, cy2; mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1); mpn_normmod_2expp1(ii[j], limbs); t = ii[j][limbs]; ii[j][limbs] = r[j] - ii[j][0]; cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]); add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t); if (cy2) r[j]++; } mpn_zero(r1, r_limbs + 1); mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1); /* as the negacyclic convolution has effectively done subtractions some of the coefficients will be negative, so need to subtract p */ ll = 0; limb_add = bits1/GMP_LIMB_BITS; for (j = 0; j < 2*n - 2; j++) { if (r[j]) mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ { mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1); } ll += limb_add; } /* penultimate coefficient, top bit was already ignored */ if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); /* final coefficient wraps around */ if (limb_add) r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add); c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add); mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c); mpn_normmod_2expp1(r1, r_limbs); TMP_FREE; }