void mpn_mul_fft_main(mp_limb_t * r1, mp_limb_t * i1, mp_size_t n1, mp_limb_t * i2, mp_size_t n2) { mp_size_t off, depth = 6; mp_size_t w = 1; mp_size_t n = ((mp_size_t) 1 << depth); mp_bitcnt_t bits = (n*w - (depth+1))/2; mp_bitcnt_t bits1 = n1*FLINT_BITS; mp_bitcnt_t bits2 = n2*FLINT_BITS; mp_size_t j1 = (bits1 - 1)/bits + 1; mp_size_t j2 = (bits2 - 1)/bits + 1; FLINT_ASSERT(n1 > 0); FLINT_ASSERT(n2 > 0); FLINT_ASSERT(j1 + j2 - 1 > 2*n); while (j1 + j2 - 1 > 4*n) /* find initial n, w */ { if (w == 1) w = 2; else { depth++; w = 1; n *= 2; } bits = (n*w - (depth+1))/2; j1 = (bits1 - 1)/bits + 1; j2 = (bits2 - 1)/bits + 1; } if (depth < 11) { mp_size_t wadj = 1; off = fft_tuning_table[depth - 6][w - 1]; /* adjust n and w */ depth -= off; n = ((mp_size_t) 1 << depth); w *= ((mp_size_t) 1 << (2*off)); if (depth < 6) wadj = ((mp_size_t) 1 << (6 - depth)); if (w > wadj) { do { /* see if a smaller w will work */ w -= wadj; bits = (n*w - (depth+1))/2; j1 = (bits1 - 1)/bits + 1; j2 = (bits2 - 1)/bits + 1; } while (j1 + j2 - 1 <= 4*n && w > wadj); w += wadj; } mul_truncate_sqrt2(r1, i1, n1, i2, n2, depth, w); } else mul_mfa_truncate_sqrt2(r1, i1, n1, i2, n2, depth, w); }
int bernoulli_mod_p_mpz(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root(p); if(!g) { return FALSE; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) mpz_poly_t F, G; mpz_poly_init2(F, poly_size); mpz_poly_init2(G, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i mpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); mpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } mpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... mpz_poly_t product; mpz_poly_init(product); mpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { mpz_poly_get_coeff(value_coeff, product, i + poly_size); value = mpz_fdiv_ui(value_coeff, p); value = z_mod_precomp(mpz_poly_get_coeff_ui(product, i + poly_size), p, p_inv); mpz_poly_get_coeff(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); } else { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } mpz_clear(value_coeff); mpz_poly_clear(F); mpz_poly_clear(G); mpz_poly_clear(product); return TRUE; }