/*-------------------------------------------------------------------------*/ static void do_sieving(xydata_t *curr_xydata) { uint32 i, j; uint16 *sieve = curr_xydata->sieve; xypower_t *curr_xypower = curr_xydata->powers + 0; uint32 p = curr_xypower->power; uint16 contrib = curr_xypower->contrib; uint32 num_roots = curr_xypower->num_roots; xyprog_t *roots = curr_xypower->roots; for (i = 0; i < num_roots; i++) { uint16 *row = sieve; xyprog_t *curr_prog = roots + i; uint32 start = curr_prog->start; uint8 *invtable_y = curr_prog->invtable_y; for (j = 0; j < p; j++) { uint32 curr_start = mp_modsub_1(start, invtable_y[j], p); row[curr_start] += contrib; row += p; } curr_prog->start = mp_modsub_1(start, curr_prog->stride_z, p); } }
/*-------------------------------------------------------------------------*/ static void xydata_init(xydata_t *xydata, uint32 num_lattice_primes, lattice_t *lattice_xyz, int64 z_base) { uint32 i, j, k, m, n; for (i = 0; i < num_lattice_primes; i++) { xydata_t *curr_xydata = xydata + i; uint32 num_powers = curr_xydata->num_powers; for (j = 0; j < num_powers; j++) { xypower_t *curr_xypower = curr_xydata->powers + j; uint32 num_roots = curr_xypower->num_roots; uint32 p = curr_xypower->power; uint32 latsize_mod = curr_xypower->latsize_mod; uint32 y_mod_p = lattice_xyz->y % p; int64 z_start = z_base + lattice_xyz->z; int32 z_start_mod = z_start % p; uint32 z_mod_p = (z_start_mod < 0) ? (z_start_mod + (int32)p) : z_start_mod; for (k = 0; k < num_roots; k++) { xyprog_t *curr_xyprog = curr_xypower->roots + k; uint8 *invtable_y = curr_xyprog->invtable_y; uint32 start = curr_xyprog->base_start; uint32 resclass = curr_xyprog->resclass; uint32 resclass2 = mp_modmul_1(resclass, resclass, p); uint32 ytmp = y_mod_p; uint32 stride_y = mp_modmul_1(resclass, latsize_mod, p); curr_xyprog->stride_z = mp_modmul_1(resclass2, latsize_mod, p); start = mp_modsub_1(start, mp_modmul_1(resclass, y_mod_p, p), p); curr_xyprog->start = mp_modsub_1(start, mp_modmul_1(resclass2, z_mod_p, p), p); for (m = n = 0; m < p; m++) { invtable_y[ytmp] = n; ytmp = mp_modadd_1(ytmp, latsize_mod, p); n = mp_modadd_1(n, stride_y, p); } } } } }
/*-------------------------------------------------------------------------*/ static void do_sieving(sieve_root_t *r, uint16 *sieve, uint32 contrib, uint32 dim) { uint32 i; uint32 start = r->start; uint32 step = r->step; uint32 resclass = r->resclass; if (resclass >= step) resclass %= step; for (i = 0; i < dim; i++) { uint32 ri = start; do { sieve[ri] += contrib; ri += step; } while (ri < dim); sieve += dim; start = mp_modsub_1(start, resclass, step); } }
/*-------------------------------------------------------------------------*/ static void do_sieving_powers(xydata_t *curr_xydata) { uint32 i, j, k, m; uint16 *sieve = curr_xydata->sieve; uint32 p = curr_xydata->p; uint32 num_powers = curr_xydata->num_powers; xypower_t *powers = curr_xydata->powers; for (i = 0; i < num_powers; i++) { xypower_t *curr_xypower = powers + i; uint32 power = curr_xypower->power; uint16 contrib = curr_xypower->contrib; uint32 num_roots = curr_xypower->num_roots; xyprog_t *roots = curr_xypower->roots; for (j = 0; j < num_roots; j++) { uint16 *row = sieve; xyprog_t *curr_prog = roots + j; uint32 start = curr_prog->start; uint8 *invtable_y = curr_prog->invtable_y; for (k = 0; k < p; k += power) { for (m = 0; m < power; m++) { uint32 curr_start = mp_modsub_1(start, invtable_y[m], power); do { row[curr_start] += contrib; curr_start += power; } while (curr_start < p); row += p; } } curr_prog->start = mp_modsub_1(start, curr_prog->stride_z, p); } } }
/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
u_int32_t is_irreducible(mpzpoly_t poly, u_int32_t p) { /* this uses Proposition 3.4.4 of H. Cohen, "A Course in Computational Algebraic Number Theory". The tests below are much simpler than trying to factor 'poly' */ u_int32_t i; poly_t f, tmp; poly_reduce_mod_p(f, poly, p); poly_make_monic(f, f, p); /* in practice, the degree of f will be 8 or less, and we want to compute GCDs for all prime numbers that divide the degree. For this limited range the loop below avoids duplicated code */ for (i = 2; i < f->degree; i++) { if (f->degree % i) continue; /* for degree d, compute x^(p^(d/i)) - x */ poly_xpow_pd(tmp, p, f->degree / i, f); if (tmp->degree == 0) { tmp->degree = 1; tmp->coef[1] = p - 1; } else { tmp->coef[1] = mp_modsub_1(tmp->coef[1], (u_int32_t)1, p); poly_fix_degree(tmp); } /* this must be relatively prime to f */ poly_gcd(tmp, f, p); if (tmp->degree > 0 || tmp->coef[0] != 1) { return 0; } } /* final test: x^(p^d) mod f must equal x */ poly_xpow_pd(tmp, p, f->degree, f); if (tmp->degree == 1 && tmp->coef[0] == 0 && tmp->coef[1] == 1) return 1; return 0; }
/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { /* given r, a d_th root of n mod old_power, compute the corresponding root mod (old_power*p) via Hensel lifting */ uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
static void poly_mod(poly_t res, poly_t op, poly_t _mod, u_int32_t p) { /* divide the polynomial 'op' by the polynomial '_mod' and write the remainder to 'res'. All polynomial coefficients are reduced modulo 'p' */ int32_t i; u_int32_t msw; poly_t tmp, mod; if(_mod->degree == 0) { memset(res, 0, sizeof(res[0])); return; } poly_cp(tmp, op); poly_make_monic(mod, _mod, p); while(tmp->degree >= mod->degree) { /* tmp <-- tmp - msw * mod * x^{deg(tmp)- deg(mod)} */ msw = tmp->coef[tmp->degree]; tmp->coef[tmp->degree] = 0; for(i = mod->degree-1; i >= 0; i--) { u_int32_t c = mp_modmul_1(msw, mod->coef[i], p); u_int32_t j = tmp->degree - (mod->degree - i); tmp->coef[j] = mp_modsub_1(tmp->coef[j], c, p); } poly_fix_degree(tmp); } poly_cp(res, tmp); return; }
/*-------------------------------------------------------------------*/ static uint32 verify_product(gmp_poly_t *gmp_prod, abpair_t *abpairs, uint32 num_relations, uint32 q, mp_t *c, mp_poly_t *alg_poly) { /* a sanity check on the computed value of S(x): for a small prime q for which alg_poly is irreducible, verify that gmp_prod mod q equals the product mod q of the relations in abpairs[]. The latter can be computed very quickly */ uint32 i, j; uint32 c_mod_q = mp_mod_1(c, q); uint32 d = alg_poly->degree; uint32 ref_prod[MAX_POLY_DEGREE]; uint32 prod[MAX_POLY_DEGREE]; uint32 mod[MAX_POLY_DEGREE]; uint32 accum[MAX_POLY_DEGREE + 1]; /* compute the product mod q directly. First initialize and reduce the coefficients of alg_poly and gmp_prod mod q */ for (i = 0; i < d; i++) { prod[i] = 0; ref_prod[i] = mpz_fdiv_ui(gmp_prod->coeff[i], (unsigned long)q); mod[i] = mp_mod_1(&alg_poly->coeff[i].num, q); if (alg_poly->coeff[i].sign == NEGATIVE && mod[i] > 0) { mod[i] = q - mod[i]; } } prod[0] = 1; /* multiply the product by each relation in turn, modulo q */ for (i = 0; i < num_relations; i++) { int64 a = abpairs[i].a; uint32 b = q - (abpairs[i].b % q); uint32 ac; a = a % (int64)q; if (a < 0) a += q; ac = mp_modmul_1((uint32)a, c_mod_q, q); for (j = accum[0] = 0; j < d; j++) { accum[j+1] = mp_modmul_1(prod[j], b, q); accum[j] = mp_modadd_1(accum[j], mp_modmul_1(ac, prod[j], q), q); } for (j = 0; j < d; j++) { prod[j] = mp_modsub_1(accum[j], mp_modmul_1(accum[d], mod[j], q), q); } } /* do the polynomial compare */ for (i = 0; i < d; i++) { if (ref_prod[i] != prod[i]) break; } if (i == d) return 1; return 0; }
/*------------------------------------------------------------------*/ u_int32_t poly_get_zeros(u_int32_t *zeros, mpzpoly_t _f, u_int32_t p, u_int32_t count_only) { /* Find all roots of multiplicity 1 for polynomial _f, when the coefficients of _f are reduced mod p. The leading coefficient of _f mod p is returned Make count_only nonzero if only the number of roots and not their identity matters; this is much faster */ poly_t g, f; u_int32_t i, j, num_zeros; /* reduce the coefficients mod p */ poly_reduce_mod_p(f, _f, p); /* bail out if the polynomial is zero */ if (f->degree == 0) return 0; /* pull out roots of zero. We do this early to avoid having to handle degree-1 polynomials in later code */ num_zeros = 0; if (f->coef[0] == 0) { for (i = 1; i <= f->degree; i++) { if (f->coef[i]) break; } for (j = i; i <= f->degree; i++) { f->coef[i - j] = f->coef[i]; } f->degree = i - j - 1; zeros[num_zeros++] = 0; } /* handle trivial cases */ if (f->degree == 0) { return num_zeros; } else if (f->degree == 1) { u_int32_t w = f->coef[1]; if (count_only) return num_zeros + 1; if (w != 1) { w = mp_modinv_1(w, p); zeros[num_zeros++] = mp_modmul_1(p - f->coef[0], w, p); } else { zeros[num_zeros++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return num_zeros; } /* the rest of the algorithm assumes p is odd, which will not work for p=2. Fortunately, in that case there are only two possible roots, 0 and 1. The above already tried 0, so try 1 here */ if (p == 2) { u_int32_t parity = 0; for (i = 0; i <= f->degree; i++) parity ^= f->coef[i]; if (parity == 0) zeros[num_zeros++] = 1; return num_zeros; } /* Compute g = gcd(f, x^(p-1) - 1). The result is a polynomial that is the product of all the linear factors of f. A given factor only occurs once in this polynomial */ poly_xpow(g, 0, p-1, f, p); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); /* no linear factors, no service */ if (g->degree < 1 || count_only) return num_zeros + g->degree; /* isolate the linear factors */ get_zeros_rec(zeros, 0, &num_zeros, g, p); return num_zeros; }
/*------------------------------------------------------------------*/ static void get_zeros_rec(u_int32_t *zeros, u_int32_t shift, u_int32_t *num_zeros, poly_t f, u_int32_t p) { /* get the zeros of a poly, f, that is known to split completely over Z/pZ. Many thanks to Bob Silverman for a neat implementation of Cantor-Zassenhaus splitting */ poly_t g, xpow; u_int32_t degree1, degree2; /* base cases of the recursion: we can find the roots of linear and quadratic polynomials immediately */ if (f->degree == 1) { u_int32_t w = f->coef[1]; if (w != 1) { w = mp_modinv_1(w, p); zeros[(*num_zeros)++] = mp_modmul_1(p - f->coef[0],w,p); } else { zeros[(*num_zeros)++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return; } else if (f->degree == 2) { /* if f is a quadratic polynomial, then it will always have two distinct nonzero roots or else we wouldn't have gotten to this point. The two roots are the solution of a general quadratic equation, mod p */ u_int32_t d = mp_modmul_1(f->coef[0], f->coef[2], p); u_int32_t root1 = p - f->coef[1]; u_int32_t root2 = root1; u_int32_t ainv = mp_modinv_1( mp_modadd_1(f->coef[2], f->coef[2], p), p); d = mp_modsub_1(mp_modmul_1(f->coef[1], f->coef[1], p), mp_modmul_1(4, d, p), p); d = mp_modsqrt_1(d, p); root1 = mp_modadd_1(root1, d, p); root2 = mp_modsub_1(root2, d, p); zeros[(*num_zeros)++] = mp_modmul_1(root1, ainv, p); zeros[(*num_zeros)++] = mp_modmul_1(root2, ainv, p); return; } /* For an increasing sequence of integers 's', compute the polynomial gcd((x-s)^(p-1)/2 - 1, f). If the result is not g = 1 or g = f, this is a nontrivial splitting of f. References require choosing s randomly, but however s is chosen there is a 50% chance that it will split f. Since only 0 <= s < p is valid, we choose each s in turn; choosing random s allows the possibility that the same s gets chosen twice (mod p), which would waste time */ while (shift < p) { poly_xpow(xpow, shift, (p-1)/2, f, p); poly_cp(g, xpow); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); if (g->degree > 0) break; shift++; } /* f was split; repeat the splitting process on the two halves of f. The linear factors of f are either somewhere in x^((p-1)/2) - 1, in x^((p-1)/2) + 1, or 'shift' itself is a linear factor. Test each of these possibilities in turn. In the first two cases, begin trying values of s strictly greater than have been tried thus far */ degree1 = g->degree; get_zeros_rec(zeros, shift + 1, num_zeros, g, p); poly_cp(g, xpow); g->coef[0] = mp_modadd_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); degree2 = g->degree; if (degree2 > 0) get_zeros_rec(zeros, shift + 1, num_zeros, g, p); if (degree1 + degree2 < f->degree) zeros[(*num_zeros)++] = (shift == 0 ? 0 : p - shift); }
static inline u_int32_t mp_modadd_1(u_int32_t a, u_int32_t b, u_int32_t n) { return mp_modsub_1(a, n - b, n); }