/*-------------------------------------------------------------------*/ static void multiply_relations(relation_prod_t *prodinfo, uint32 index1, uint32 index2, mpz_poly_t *prod) { /* multiply together the relations from index1 to index2, inclusive. We proceed recursively to assure that polynomials with approximately equal- size coefficients get multiplied, and also to avoid wasting huge amounts of memory in the beginning when all the polynomials are small but the memory allocated for them is large */ uint32 i; mpz_poly_t prod1, prod2; if (index1 == index2) { /* base case of recursion */ relation_to_poly(prodinfo->rlist + index1, prodinfo->c, prod); return; } mpz_poly_init(&prod1); mpz_poly_init(&prod2); if (index1 == index2 - 1) { /* base case of recursion */ relation_to_poly(prodinfo->rlist + index1, prodinfo->c, &prod1); relation_to_poly(prodinfo->rlist + index2, prodinfo->c, &prod2); } else { /* recursively compute the product of the first half and the last half of the relations */ uint32 mid = (index1 + index2) / 2; multiply_relations(prodinfo, index1, mid, &prod1); multiply_relations(prodinfo, mid + 1, index2, &prod2); } /* multiply them together and save the result */ mpz_poly_mul(&prod1, &prod2, prodinfo->monic_poly, 1); for (i = 0; i <= prod1.degree; i++) mpz_swap(prod->coeff[i], prod1.coeff[i]); prod->degree = prod1.degree; mpz_poly_free(&prod1); mpz_poly_free(&prod2); }
int bernoulli_mod_p_mpz(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root(p); if(!g) { return FALSE; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) mpz_poly_t F, G; mpz_poly_init2(F, poly_size); mpz_poly_init2(G, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i mpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); mpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } mpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... mpz_poly_t product; mpz_poly_init(product); mpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { mpz_poly_get_coeff(value_coeff, product, i + poly_size); value = mpz_fdiv_ui(value_coeff, p); value = z_mod_precomp(mpz_poly_get_coeff_ui(product, i + poly_size), p, p_inv); mpz_poly_get_coeff(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); } else { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } mpz_clear(value_coeff); mpz_poly_clear(F); mpz_poly_clear(G); mpz_poly_clear(product); return TRUE; }
/*-------------------------------------------------------------------*/ void alg_square_root(msieve_obj *obj, mpz_poly_t *alg_poly, mpz_t n, mpz_t c, mpz_t m1, mpz_t m0, abpair_t *rlist, uint32 num_relations, uint32 check_q, mpz_t sqrt_a) { /* external interface for computing the algebraic square root */ uint32 i; mpz_poly_t d_alg_poly; mpz_poly_t prod; mpz_poly_t alg_sqrt; relation_prod_t prodinfo; double log2_prodsize; mpz_t q; /* initialize */ mpz_init(q); mpz_poly_init(&d_alg_poly); mpz_poly_init(&prod); mpz_poly_init(&alg_sqrt); if (mpz_cmp_ui(alg_poly->coeff[alg_poly->degree], 1) != 0) { printf("error: sqrt requires input poly to be monic\n"); exit(-1); } alg_poly->degree--; /* multiply all the relations together */ prodinfo.monic_poly = alg_poly; prodinfo.rlist = rlist; mpz_init_set(prodinfo.c, c); logprintf(obj, "multiplying %u relations\n", num_relations); multiply_relations(&prodinfo, 0, num_relations - 1, &prod); logprintf(obj, "multiply complete, coefficients have about " "%3.2lf million bits\n", (double)mpz_sizeinbase(prod.coeff[0], 2) / 1e6); /* perform a sanity check on the result */ i = verify_product(&prod, rlist, num_relations, check_q, c, alg_poly); free(rlist); mpz_clear(prodinfo.c); if (i == 0) { logprintf(obj, "error: relation product is incorrect\n"); goto finished; } /* multiply by the square of the derivative of alg_poly; this will guarantee that the square root of prod actually is an element of the number field defined by alg_poly. If we didn't do this, we run the risk of the main Newton iteration not converging */ mpz_poly_monic_derivative(alg_poly, &d_alg_poly); mpz_poly_mul(&d_alg_poly, &d_alg_poly, alg_poly, 0); mpz_poly_mul(&prod, &d_alg_poly, alg_poly, 1); /* pick the initial small prime to start the Newton iteration. To save both time and memory, choose an initial prime such that squaring it a large number of times will produce a value just a little larger than we need to calculate the square root. Note that contrary to what some authors write, pretty much any starting prime is okay. The Newton iteration has a division by 2, so that 2 must be invertible mod the prime (this is guaranteed for odd primes). Also, the Newton iteration will fail if both square roots have the same value mod the prime; however, even a 16-bit prime makes this very unlikely */ i = mpz_size(prod.coeff[0]); log2_prodsize = (double)GMP_LIMB_BITS * (i - 2) + log(mpz_getlimbn(prod.coeff[0], (mp_size_t)(i-1)) * pow(2.0, (double)GMP_LIMB_BITS) + mpz_getlimbn(prod.coeff[0], (mp_size_t)(i-2))) / M_LN2 + 10000; while (log2_prodsize > 31.5) log2_prodsize *= 0.5; mpz_set_d(q, (uint32)pow(2.0, log2_prodsize) + 1); /* get the initial inverse square root */ if (!get_initial_inv_sqrt(obj, alg_poly, &prod, &alg_sqrt, q)) { goto finished; } /* compute the actual square root */ if (get_final_sqrt(obj, alg_poly, &prod, &alg_sqrt, q)) convert_to_integer(&alg_sqrt, n, c, m1, m0, sqrt_a); finished: mpz_poly_free(&prod); mpz_poly_free(&alg_sqrt); mpz_poly_free(&d_alg_poly); mpz_clear(q); alg_poly->degree++; }
/*-------------------------------------------------------------------*/ static uint32 get_final_sqrt(msieve_obj *obj, mpz_poly_t *alg_poly, mpz_poly_t *prod, mpz_poly_t *isqrt_mod_q, mpz_t q) { /* the main q-adic Newton iteration. On input, isqrt_mod_q contains the starting value of the reciprocal square root R[0](x) of the polynomial prod(x). The iteration is R[k](x) = R[k-1](x) * (3 - prod(x)*R[k-1](x)^2) / 2 mod (q^(2^k)) and at the end of iteration k, prod(x)*R[k-1](x)^2 mod (q^(2^k)) is 1. We keep iterating until q^(2^k) is larger than the size of the coefficients of the square root (i.e. about half the size of the coefficients of prod(x)). Then the square root to use is R[k](x) * prod(x) mod (q^(2^k)), which is written to isqrt_mod_q */ uint32 i, j; uint32 prod_bits, prod_max_bits; uint32 num_iter; /* initialize */ mpz_poly_bits(prod, &prod_bits, &prod_max_bits); /* since prod(x) only matters mod q^(2^(final_k)), we can cut the memory use in half by changing prod(x) to this. Remember final_k as well */ i = mpz_get_ui(q); for (num_iter = 0; mpz_sizeinbase(q, 2) < prod_max_bits / 2 + 4000; num_iter++) { mpz_mul(q, q, q); } mpz_poly_mod_q(prod, q, prod); mpz_set_ui(q, (unsigned long)i); mpz_realloc2(q, 33); /* do the main iteration */ for (i = 0; i < num_iter; i++) { mpz_poly_t tmp_poly; /* square the previous modulus */ mpz_mul(q, q, q); /* compute prod(x) * (previous R)^2 */ mpz_poly_init(&tmp_poly); mpz_poly_mod_q(prod, q, &tmp_poly); mpz_poly_mul(&tmp_poly, isqrt_mod_q, alg_poly, 0); mpz_poly_mod_q(&tmp_poly, q, &tmp_poly); mpz_poly_mul(&tmp_poly, isqrt_mod_q, alg_poly, 0); mpz_poly_mod_q(&tmp_poly, q, &tmp_poly); /* compute ( (3 - that) / 2 ) mod q */ mpz_sub_ui(tmp_poly.coeff[0], tmp_poly.coeff[0], (unsigned long)3); for (j = 0; j <= tmp_poly.degree; j++) { mpz_t *c = tmp_poly.coeff + j; if (mpz_sgn(*c) != 0) { mpz_neg(*c, *c); if (mpz_tstbit(*c, (unsigned long)0)) mpz_add(*c, *c, q); mpz_tdiv_q_2exp(*c, *c, (unsigned long)1); } } /* finally, compute the new R(x) by multiplying the result above by the old R(x) */ mpz_poly_mul(&tmp_poly, isqrt_mod_q, alg_poly, 1); mpz_poly_mod_q(&tmp_poly, q, isqrt_mod_q); mpz_poly_free(&tmp_poly); } /* attempt to compute the square root. First multiply R(x) by prod(x), deleting prod(x) since we won't need it beyond this point */ mpz_poly_mul(isqrt_mod_q, prod, alg_poly, 1); mpz_poly_mod_q(isqrt_mod_q, q, isqrt_mod_q); /* this is a little tricky. Up until now we've been working modulo big numbers, but the coef- ficients of the square root are just integers, and may be negative. Negative numbers mod q have a numerical value near that of +q, but we want the square root to have a negative coef- ficient in that case. Hence, if the top few words of any coefficent of the square root match the top few words of q, we assume this coefficient is negative and subtract q from it. Theoretically we could be wrong, and the coefficient really is supposed to be a big positive number near q in size. However, if q is thousands of bits larger than the size we expect for the square root coefficients, this is so unlikely that it's not worth worrying about */ for (i = 0; i <= isqrt_mod_q->degree; i++) { mpz_t *c = isqrt_mod_q->coeff + i; size_t limbs = mpz_size(*c); if (limbs == mpz_size(q) && mpz_getlimbn(*c, (mp_size_t)(limbs-1)) == mpz_getlimbn(q, (mp_size_t)(limbs-1)) && mpz_getlimbn(*c, (mp_size_t)(limbs-2)) == mpz_getlimbn(q, (mp_size_t)(limbs-2)) && mpz_getlimbn(*c, (mp_size_t)(limbs-3)) == mpz_getlimbn(q, (mp_size_t)(limbs-3))) { mpz_sub(*c, *c, q); } } /* another heuristic: we will assume the Newton iteration has converged if, after applying the correction above for negative square root coefficients, the total number of bits in the coefficients of the resulting polynomial is much smaller than we would expect from random polynomials modulo q */ mpz_poly_bits(isqrt_mod_q, &prod_bits, &i); if (prod_bits >= (isqrt_mod_q->degree + 1) * mpz_sizeinbase(q, 2) - 100) { logprintf(obj, "Newton iteration failed to converge\n"); return 0; } return 1; }