/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { /* given r, a d_th root of n mod old_power, compute the corresponding root mod (old_power*p) via Hensel lifting */ uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
static inline void poly_make_monic(poly_t res, poly_t a, u_int32_t p) { u_int32_t i; u_int32_t d = a->degree; u_int32_t msw = a->coef[d]; if (msw != 1) { msw = mp_modinv_1(msw, p); res->degree = d; res->coef[d] = 1; for (i = 0; i < d; i++) res->coef[i] = mp_modmul_1(msw, a->coef[i], p); } else { poly_cp(res, a); } return; }
/*------------------------------------------------------------------------*/ static uint32 get_composite_roots(sieve_fb_t *s, curr_poly_t *c, uint32 which_poly, uint64 p, uint32 num_factors, uint32 *factors, uint32 num_roots_min, uint32 num_roots_max) { uint32 i, j, k, i0, i1, i2, i3, i4, i5, i6; uint32 crt_p[MAX_P_FACTORS]; uint32 num_roots[MAX_P_FACTORS]; uint64 prod[MAX_P_FACTORS]; uint32 roots[MAX_P_FACTORS][MAX_POLYSELECT_DEGREE]; aprog_t *aprogs = s->aprog_data.aprogs; uint32 degree = s->degree; for (i = 0, j = 1; i < num_factors; i++) { aprog_t *a; if (i > 0 && factors[i] == factors[i-1]) continue; a = aprogs + factors[i]; if (a->num_roots[which_poly] == 0) return 0; j *= a->num_roots[which_poly]; } if (j < num_roots_min || j > num_roots_max) return INVALID_NUM_ROOTS; for (i = j = 0; j < MAX_P_FACTORS && i < num_factors; i++, j++) { aprog_t *a = aprogs + factors[i]; uint32 power_limit; num_roots[j] = a->num_roots[which_poly]; crt_p[j] = a->p; power_limit = (uint32)(-1) / a->p; for (k = 0; k < num_roots[j]; k++) { roots[j][k] = a->roots[which_poly][k]; } while (i < num_factors - 1 && factors[i] == factors[i+1]) { uint32 nmodp, new_power; if (crt_p[j] > power_limit) return 0; new_power = crt_p[j] * a->p; nmodp = mpz_tdiv_ui(c->trans_N, (mp_limb_t)new_power); for (k = 0; k < num_roots[j]; k++) { roots[j][k] = lift_root_32(nmodp, roots[j][k], crt_p[j], a->p, degree); } crt_p[j] = new_power; i++; } } if (i < num_factors) return 0; num_factors = j; if (num_factors == 1) { for (i = 0; i < num_roots[0]; i++) mpz_set_ui(s->roots[i], (mp_limb_t)roots[0][i]); return num_roots[0]; } for (i = 0; i < num_factors; i++) { prod[i] = p / crt_p[i]; prod[i] = prod[i] * mp_modinv_1((uint32)(prod[i] % crt_p[i]), crt_p[i]); } mpz_set_ui(s->accum[i], (mp_limb_t)0); uint64_2gmp(p, s->p); i0 = i1 = i2 = i3 = i4 = i5 = i6 = i = 0; switch (num_factors) { case 7: for (i6 = num_roots[6] - 1; (int32)i6 >= 0; i6--) { uint64_2gmp(prod[6], s->accum[6]); mpz_mul_ui(s->accum[6], s->accum[6], (mp_limb_t)roots[6][i6]); mpz_add(s->accum[6], s->accum[6], s->accum[7]); case 6: for (i5 = num_roots[5] - 1; (int32)i5 >= 0; i5--) { uint64_2gmp(prod[5], s->accum[5]); mpz_mul_ui(s->accum[5], s->accum[5], (mp_limb_t)roots[5][i5]); mpz_add(s->accum[5], s->accum[5], s->accum[6]); case 5: for (i4 = num_roots[4] - 1; (int32)i4 >= 0; i4--) { uint64_2gmp(prod[4], s->accum[4]); mpz_mul_ui(s->accum[4], s->accum[4], (mp_limb_t)roots[4][i4]); mpz_add(s->accum[4], s->accum[4], s->accum[5]); case 4: for (i3 = num_roots[3] - 1; (int32)i3 >= 0; i3--) { uint64_2gmp(prod[3], s->accum[3]); mpz_mul_ui(s->accum[3], s->accum[3], (mp_limb_t)roots[3][i3]); mpz_add(s->accum[3], s->accum[3], s->accum[4]); case 3: for (i2 = num_roots[2] - 1; (int32)i2 >= 0; i2--) { uint64_2gmp(prod[2], s->accum[2]); mpz_mul_ui(s->accum[2], s->accum[2], (mp_limb_t)roots[2][i2]); mpz_add(s->accum[2], s->accum[2], s->accum[3]); case 2: for (i1 = num_roots[1] - 1; (int32)i1 >= 0; i1--) { uint64_2gmp(prod[1], s->accum[1]); mpz_mul_ui(s->accum[1], s->accum[1], (mp_limb_t)roots[1][i1]); mpz_add(s->accum[1], s->accum[1], s->accum[2]); for (i0 = num_roots[0] - 1; (int32)i0 >= 0; i0--) { uint64_2gmp(prod[0], s->accum[0]); mpz_mul_ui(s->accum[0], s->accum[0], (mp_limb_t)roots[0][i0]); mpz_add(s->accum[0], s->accum[0], s->accum[1]); mpz_tdiv_r(s->accum[0], s->accum[0], s->p); mpz_set(s->roots[i++], s->accum[0]); }}}}}}} } return i; }
/*------------------------------------------------------------------*/ u_int32_t poly_get_zeros(u_int32_t *zeros, mpzpoly_t _f, u_int32_t p, u_int32_t count_only) { /* Find all roots of multiplicity 1 for polynomial _f, when the coefficients of _f are reduced mod p. The leading coefficient of _f mod p is returned Make count_only nonzero if only the number of roots and not their identity matters; this is much faster */ poly_t g, f; u_int32_t i, j, num_zeros; /* reduce the coefficients mod p */ poly_reduce_mod_p(f, _f, p); /* bail out if the polynomial is zero */ if (f->degree == 0) return 0; /* pull out roots of zero. We do this early to avoid having to handle degree-1 polynomials in later code */ num_zeros = 0; if (f->coef[0] == 0) { for (i = 1; i <= f->degree; i++) { if (f->coef[i]) break; } for (j = i; i <= f->degree; i++) { f->coef[i - j] = f->coef[i]; } f->degree = i - j - 1; zeros[num_zeros++] = 0; } /* handle trivial cases */ if (f->degree == 0) { return num_zeros; } else if (f->degree == 1) { u_int32_t w = f->coef[1]; if (count_only) return num_zeros + 1; if (w != 1) { w = mp_modinv_1(w, p); zeros[num_zeros++] = mp_modmul_1(p - f->coef[0], w, p); } else { zeros[num_zeros++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return num_zeros; } /* the rest of the algorithm assumes p is odd, which will not work for p=2. Fortunately, in that case there are only two possible roots, 0 and 1. The above already tried 0, so try 1 here */ if (p == 2) { u_int32_t parity = 0; for (i = 0; i <= f->degree; i++) parity ^= f->coef[i]; if (parity == 0) zeros[num_zeros++] = 1; return num_zeros; } /* Compute g = gcd(f, x^(p-1) - 1). The result is a polynomial that is the product of all the linear factors of f. A given factor only occurs once in this polynomial */ poly_xpow(g, 0, p-1, f, p); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); /* no linear factors, no service */ if (g->degree < 1 || count_only) return num_zeros + g->degree; /* isolate the linear factors */ get_zeros_rec(zeros, 0, &num_zeros, g, p); return num_zeros; }
/*------------------------------------------------------------------*/ static void get_zeros_rec(u_int32_t *zeros, u_int32_t shift, u_int32_t *num_zeros, poly_t f, u_int32_t p) { /* get the zeros of a poly, f, that is known to split completely over Z/pZ. Many thanks to Bob Silverman for a neat implementation of Cantor-Zassenhaus splitting */ poly_t g, xpow; u_int32_t degree1, degree2; /* base cases of the recursion: we can find the roots of linear and quadratic polynomials immediately */ if (f->degree == 1) { u_int32_t w = f->coef[1]; if (w != 1) { w = mp_modinv_1(w, p); zeros[(*num_zeros)++] = mp_modmul_1(p - f->coef[0],w,p); } else { zeros[(*num_zeros)++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return; } else if (f->degree == 2) { /* if f is a quadratic polynomial, then it will always have two distinct nonzero roots or else we wouldn't have gotten to this point. The two roots are the solution of a general quadratic equation, mod p */ u_int32_t d = mp_modmul_1(f->coef[0], f->coef[2], p); u_int32_t root1 = p - f->coef[1]; u_int32_t root2 = root1; u_int32_t ainv = mp_modinv_1( mp_modadd_1(f->coef[2], f->coef[2], p), p); d = mp_modsub_1(mp_modmul_1(f->coef[1], f->coef[1], p), mp_modmul_1(4, d, p), p); d = mp_modsqrt_1(d, p); root1 = mp_modadd_1(root1, d, p); root2 = mp_modsub_1(root2, d, p); zeros[(*num_zeros)++] = mp_modmul_1(root1, ainv, p); zeros[(*num_zeros)++] = mp_modmul_1(root2, ainv, p); return; } /* For an increasing sequence of integers 's', compute the polynomial gcd((x-s)^(p-1)/2 - 1, f). If the result is not g = 1 or g = f, this is a nontrivial splitting of f. References require choosing s randomly, but however s is chosen there is a 50% chance that it will split f. Since only 0 <= s < p is valid, we choose each s in turn; choosing random s allows the possibility that the same s gets chosen twice (mod p), which would waste time */ while (shift < p) { poly_xpow(xpow, shift, (p-1)/2, f, p); poly_cp(g, xpow); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); if (g->degree > 0) break; shift++; } /* f was split; repeat the splitting process on the two halves of f. The linear factors of f are either somewhere in x^((p-1)/2) - 1, in x^((p-1)/2) + 1, or 'shift' itself is a linear factor. Test each of these possibilities in turn. In the first two cases, begin trying values of s strictly greater than have been tried thus far */ degree1 = g->degree; get_zeros_rec(zeros, shift + 1, num_zeros, g, p); poly_cp(g, xpow); g->coef[0] = mp_modadd_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); degree2 = g->degree; if (degree2 > 0) get_zeros_rec(zeros, shift + 1, num_zeros, g, p); if (degree1 + degree2 < f->degree) zeros[(*num_zeros)++] = (shift == 0 ? 0 : p - shift); }
/*------------------------------------------------------------------------*/ static uint32 combine_roots(sieve_fb_t *s, uint32 p, uint32 num_factors, uint32 p_i[MAX_P_FACTORS], uint32 num_roots[MAX_P_FACTORS], uint32 roots[MAX_P_FACTORS][MAX_POLYSELECT_DEGREE]) { /* given a composite p and its factors p_i, combine the roots mod p_i into roots mod p using the Chinese Remainder Theorem */ uint32 i, i0, i1, i2, i3, i4, i5, i6; uint32 prod[MAX_P_FACTORS]; uint64 accum[MAX_P_FACTORS + 1]; if (num_factors == 1) { /* no CRT needed */ for (i = 0; i < num_roots[0]; i++) s->roots[i] = roots[0][i]; return MIN(num_roots[0], s->num_roots_max); } /* fill in auxiliary CRT quantities */ for (i = 0; i < num_factors; i++) { prod[i] = p / p_i[i]; prod[i] *= mp_modinv_1(prod[i] % p_i[i], p_i[i]); } accum[i] = 0; #if MAX_P_FACTORS > 7 #error "MAX_P_FACTORS exceeds 7" #endif /* loop over all combinations of roots, changing one root at a time. The accumulator value in the innermost loop will exceed p by a few bits, so we need the accum array to have wide integers. */ i0 = i1 = i2 = i3 = i4 = i5 = i6 = i = 0; switch (num_factors) { case 7: for (i6 = num_roots[6] - 1; (int32)i6 >= 0; i6--) { accum[6] = accum[7] + (uint64)prod[6] * roots[6][i6]; case 6: for (i5 = num_roots[5] - 1; (int32)i5 >= 0; i5--) { accum[5] = accum[6] + (uint64)prod[5] * roots[5][i5]; case 5: for (i4 = num_roots[4] - 1; (int32)i4 >= 0; i4--) { accum[4] = accum[5] + (uint64)prod[4] * roots[4][i4]; case 4: for (i3 = num_roots[3] - 1; (int32)i3 >= 0; i3--) { accum[3] = accum[4] + (uint64)prod[3] * roots[3][i3]; case 3: for (i2 = num_roots[2] - 1; (int32)i2 >= 0; i2--) { accum[2] = accum[3] + (uint64)prod[2] * roots[2][i2]; case 2: for (i1 = num_roots[1] - 1; (int32)i1 >= 0; i1--) { accum[1] = accum[2] + (uint64)prod[1] * roots[1][i1]; for (i0 = num_roots[0] - 1; (int32)i0 >= 0; i0--) { accum[0] = accum[1] + (uint64)prod[0] * roots[0][i0]; s->roots[i++] = accum[0] % p; if (i == s->num_roots_max) goto finished; }}}}}}} } finished: return i; }