/*-------------------------------------------------------------------------*/ static void xydata_init(xydata_t *xydata, uint32 num_lattice_primes, lattice_t *lattice_xyz, int64 z_base) { uint32 i, j, k, m, n; for (i = 0; i < num_lattice_primes; i++) { xydata_t *curr_xydata = xydata + i; uint32 num_powers = curr_xydata->num_powers; for (j = 0; j < num_powers; j++) { xypower_t *curr_xypower = curr_xydata->powers + j; uint32 num_roots = curr_xypower->num_roots; uint32 p = curr_xypower->power; uint32 latsize_mod = curr_xypower->latsize_mod; uint32 y_mod_p = lattice_xyz->y % p; int64 z_start = z_base + lattice_xyz->z; int32 z_start_mod = z_start % p; uint32 z_mod_p = (z_start_mod < 0) ? (z_start_mod + (int32)p) : z_start_mod; for (k = 0; k < num_roots; k++) { xyprog_t *curr_xyprog = curr_xypower->roots + k; uint8 *invtable_y = curr_xyprog->invtable_y; uint32 start = curr_xyprog->base_start; uint32 resclass = curr_xyprog->resclass; uint32 resclass2 = mp_modmul_1(resclass, resclass, p); uint32 ytmp = y_mod_p; uint32 stride_y = mp_modmul_1(resclass, latsize_mod, p); curr_xyprog->stride_z = mp_modmul_1(resclass2, latsize_mod, p); start = mp_modsub_1(start, mp_modmul_1(resclass, y_mod_p, p), p); curr_xyprog->start = mp_modsub_1(start, mp_modmul_1(resclass2, z_mod_p, p), p); for (m = n = 0; m < p; m++) { invtable_y[ytmp] = n; ytmp = mp_modadd_1(ytmp, latsize_mod, p); n = mp_modadd_1(n, stride_y, p); } } } } }
static inline u_int32_t mp_expo_1(u_int32_t a, u_int32_t b, u_int32_t n) { u_int32_t res = 1; while(b) { if(b & 1) res = mp_modmul_1(res, a, n); a = mp_modmul_1(a, a, n); b = b >> 1; } return res; }
/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
/*------------------------------------------------------------------*/ u_int32_t poly_get_zeros_and_mult(u_int32_t *zeros, u_int32_t *mult, mpzpoly_t _f, u_int32_t p) { u_int32_t i; u_int32_t num_roots; poly_t f; num_roots = poly_get_zeros(zeros, _f, p, 0); if (num_roots == 0) return num_roots; poly_reduce_mod_p(f, _f, p); for (i = 0; i < num_roots; i++) mult[i] = 0; if (f->degree == num_roots) return num_roots; for (i = 0; i < num_roots; i++) { poly_t g, r; u_int32_t root = zeros[i]; g->degree = 2; g->coef[0] = mp_modmul_1(root, root, p); g->coef[1] = p - mp_modadd_1(root, root, p); g->coef[2] = 1; poly_mod(r, f, g, p); if (r->degree == 0) mult[i] = 1; } return num_roots; }
/*------------------------------------------------------------------------*/ static uint32 lift_root_32(uint32 n, uint32 r, uint32 old_power, uint32 p, uint32 d) { /* given r, a d_th root of n mod old_power, compute the corresponding root mod (old_power*p) via Hensel lifting */ uint32 q; uint32 p2 = old_power * p; uint64 rsave = r; q = mp_modsub_1(n % p2, mp_expo_1(r, d, p2), p2) / old_power; r = mp_modmul_1(d, mp_expo_1(r % p, d - 1, p), p); r = mp_modmul_1(q, mp_modinv_1(r, p), p); return rsave + old_power * r; }
static void poly_modmul(poly_t res, poly_t a, poly_t b, poly_t mod, u_int32_t p) { u_int32_t i, j; poly_t prod; for (i = 0; i <= a->degree; i++) prod->coef[i] = mp_modmul_1(a->coef[i], b->coef[0], p); for (i = 1; i <= b->degree; i++) { for (j = 0; j < a->degree; j++) { u_int32_t c = mp_modmul_1(a->coef[j], b->coef[i], p); prod->coef[i+j] = mp_modadd_1(prod->coef[i+j], c, p); } prod->coef[i+j] = mp_modmul_1(a->coef[j], b->coef[i], p); } prod->degree = a->degree + b->degree; poly_fix_degree(prod); poly_mod(res, prod, mod, p); return; }
static inline void poly_make_monic(poly_t res, poly_t a, u_int32_t p) { u_int32_t i; u_int32_t d = a->degree; u_int32_t msw = a->coef[d]; if (msw != 1) { msw = mp_modinv_1(msw, p); res->degree = d; res->coef[d] = 1; for (i = 0; i < d; i++) res->coef[i] = mp_modmul_1(msw, a->coef[i], p); } else { poly_cp(res, a); } return; }
static void poly_mod(poly_t res, poly_t op, poly_t _mod, u_int32_t p) { /* divide the polynomial 'op' by the polynomial '_mod' and write the remainder to 'res'. All polynomial coefficients are reduced modulo 'p' */ int32_t i; u_int32_t msw; poly_t tmp, mod; if(_mod->degree == 0) { memset(res, 0, sizeof(res[0])); return; } poly_cp(tmp, op); poly_make_monic(mod, _mod, p); while(tmp->degree >= mod->degree) { /* tmp <-- tmp - msw * mod * x^{deg(tmp)- deg(mod)} */ msw = tmp->coef[tmp->degree]; tmp->coef[tmp->degree] = 0; for(i = mod->degree-1; i >= 0; i--) { u_int32_t c = mp_modmul_1(msw, mod->coef[i], p); u_int32_t j = tmp->degree - (mod->degree - i); tmp->coef[j] = mp_modsub_1(tmp->coef[j], c, p); } poly_fix_degree(tmp); } poly_cp(res, tmp); return; }
static uint8 choose_multiplier(mp_t *n, prime_list_t *prime_list, uint32 fb_size) { uint32 i, j; uint32 num_primes = MIN(2 * fb_size, NUM_TEST_PRIMES); double best_score; uint8 best_mult; double scores[NUM_MULTIPLIERS]; uint32 num_multipliers; double log2n = mp_log(n); num_primes = MIN(num_primes, prime_list->num_primes); /* measure the contribution of 2 as a factor of sieve values. The multiplier itself must also be taken into account in the score. scores[i] is the correction that is implicitly applied to the size of sieve values for multiplier i; a negative score makes sieve values smaller, and so is better */ for (i = 0; i < NUM_MULTIPLIERS; i++) { uint8 curr_mult = mult_list[i]; uint8 knmod8 = (curr_mult * n->val[0]) % 8; double logmult = log((double)curr_mult); /* only consider multipliers k such than k*n will not overflow an mp_t */ if (log2n + logmult > (32 * MAX_MP_WORDS - 2) * M_LN2) break; scores[i] = 0.5 * logmult; switch (knmod8) { case 1: scores[i] -= 2 * M_LN2; break; case 5: scores[i] -= M_LN2; break; case 3: case 7: scores[i] -= 0.5 * M_LN2; break; /* even multipliers start with a handicap */ } } num_multipliers = i; /* for the rest of the small factor base primes */ for (i = 1; i < num_primes; i++) { uint32 prime = prime_list->list[i]; double contrib = log((double)prime) / (prime - 1); uint32 modp = mp_mod_1(n, prime); for (j = 0; j < num_multipliers; j++) { uint8 curr_mult = mult_list[j]; uint32 knmodp = mp_modmul_1(modp, curr_mult, prime); /* if prime i is actually in the factor base for k * n ... */ if (knmodp == 0 || mp_legendre_1(knmodp, prime) == 1) { /* ...add its contribution. A prime p con- tributes log(p) to 1 in p sieve values, plus log(p) to 1 in p^2 sieve values, etc. The average contribution of all multiples of p to a random sieve value is thus log(p) * (1/p + 1/p^2 + 1/p^3 + ...) = (log(p) / p) * 1 / (1 - (1/p)) = log(p) / (p-1) This contribution occurs once for each square root used for sieving. There are two roots for each factor base prime, unless the prime divides k*n. In that case there is only one root */ if (knmodp == 0) scores[j] -= contrib; else scores[j] -= 2 * contrib; } } } /* use the multiplier that generates the best score */ best_score = 1000.0; best_mult = 1; for (i = 0; i < num_multipliers; i++) { double score = scores[i]; if (score < best_score) { best_score = score; best_mult = mult_list[i]; } } return best_mult; }
/*-------------------------------------------------------------------*/ static uint32 verify_product(gmp_poly_t *gmp_prod, abpair_t *abpairs, uint32 num_relations, uint32 q, mp_t *c, mp_poly_t *alg_poly) { /* a sanity check on the computed value of S(x): for a small prime q for which alg_poly is irreducible, verify that gmp_prod mod q equals the product mod q of the relations in abpairs[]. The latter can be computed very quickly */ uint32 i, j; uint32 c_mod_q = mp_mod_1(c, q); uint32 d = alg_poly->degree; uint32 ref_prod[MAX_POLY_DEGREE]; uint32 prod[MAX_POLY_DEGREE]; uint32 mod[MAX_POLY_DEGREE]; uint32 accum[MAX_POLY_DEGREE + 1]; /* compute the product mod q directly. First initialize and reduce the coefficients of alg_poly and gmp_prod mod q */ for (i = 0; i < d; i++) { prod[i] = 0; ref_prod[i] = mpz_fdiv_ui(gmp_prod->coeff[i], (unsigned long)q); mod[i] = mp_mod_1(&alg_poly->coeff[i].num, q); if (alg_poly->coeff[i].sign == NEGATIVE && mod[i] > 0) { mod[i] = q - mod[i]; } } prod[0] = 1; /* multiply the product by each relation in turn, modulo q */ for (i = 0; i < num_relations; i++) { int64 a = abpairs[i].a; uint32 b = q - (abpairs[i].b % q); uint32 ac; a = a % (int64)q; if (a < 0) a += q; ac = mp_modmul_1((uint32)a, c_mod_q, q); for (j = accum[0] = 0; j < d; j++) { accum[j+1] = mp_modmul_1(prod[j], b, q); accum[j] = mp_modadd_1(accum[j], mp_modmul_1(ac, prod[j], q), q); } for (j = 0; j < d; j++) { prod[j] = mp_modsub_1(accum[j], mp_modmul_1(accum[d], mod[j], q), q); } } /* do the polynomial compare */ for (i = 0; i < d; i++) { if (ref_prod[i] != prod[i]) break; } if (i == d) return 1; return 0; }
/*------------------------------------------------------------------*/ u_int32_t poly_get_zeros(u_int32_t *zeros, mpzpoly_t _f, u_int32_t p, u_int32_t count_only) { /* Find all roots of multiplicity 1 for polynomial _f, when the coefficients of _f are reduced mod p. The leading coefficient of _f mod p is returned Make count_only nonzero if only the number of roots and not their identity matters; this is much faster */ poly_t g, f; u_int32_t i, j, num_zeros; /* reduce the coefficients mod p */ poly_reduce_mod_p(f, _f, p); /* bail out if the polynomial is zero */ if (f->degree == 0) return 0; /* pull out roots of zero. We do this early to avoid having to handle degree-1 polynomials in later code */ num_zeros = 0; if (f->coef[0] == 0) { for (i = 1; i <= f->degree; i++) { if (f->coef[i]) break; } for (j = i; i <= f->degree; i++) { f->coef[i - j] = f->coef[i]; } f->degree = i - j - 1; zeros[num_zeros++] = 0; } /* handle trivial cases */ if (f->degree == 0) { return num_zeros; } else if (f->degree == 1) { u_int32_t w = f->coef[1]; if (count_only) return num_zeros + 1; if (w != 1) { w = mp_modinv_1(w, p); zeros[num_zeros++] = mp_modmul_1(p - f->coef[0], w, p); } else { zeros[num_zeros++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return num_zeros; } /* the rest of the algorithm assumes p is odd, which will not work for p=2. Fortunately, in that case there are only two possible roots, 0 and 1. The above already tried 0, so try 1 here */ if (p == 2) { u_int32_t parity = 0; for (i = 0; i <= f->degree; i++) parity ^= f->coef[i]; if (parity == 0) zeros[num_zeros++] = 1; return num_zeros; } /* Compute g = gcd(f, x^(p-1) - 1). The result is a polynomial that is the product of all the linear factors of f. A given factor only occurs once in this polynomial */ poly_xpow(g, 0, p-1, f, p); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); /* no linear factors, no service */ if (g->degree < 1 || count_only) return num_zeros + g->degree; /* isolate the linear factors */ get_zeros_rec(zeros, 0, &num_zeros, g, p); return num_zeros; }
/*------------------------------------------------------------------*/ static void get_zeros_rec(u_int32_t *zeros, u_int32_t shift, u_int32_t *num_zeros, poly_t f, u_int32_t p) { /* get the zeros of a poly, f, that is known to split completely over Z/pZ. Many thanks to Bob Silverman for a neat implementation of Cantor-Zassenhaus splitting */ poly_t g, xpow; u_int32_t degree1, degree2; /* base cases of the recursion: we can find the roots of linear and quadratic polynomials immediately */ if (f->degree == 1) { u_int32_t w = f->coef[1]; if (w != 1) { w = mp_modinv_1(w, p); zeros[(*num_zeros)++] = mp_modmul_1(p - f->coef[0],w,p); } else { zeros[(*num_zeros)++] = (f->coef[0] == 0 ? 0 : p - f->coef[0]); } return; } else if (f->degree == 2) { /* if f is a quadratic polynomial, then it will always have two distinct nonzero roots or else we wouldn't have gotten to this point. The two roots are the solution of a general quadratic equation, mod p */ u_int32_t d = mp_modmul_1(f->coef[0], f->coef[2], p); u_int32_t root1 = p - f->coef[1]; u_int32_t root2 = root1; u_int32_t ainv = mp_modinv_1( mp_modadd_1(f->coef[2], f->coef[2], p), p); d = mp_modsub_1(mp_modmul_1(f->coef[1], f->coef[1], p), mp_modmul_1(4, d, p), p); d = mp_modsqrt_1(d, p); root1 = mp_modadd_1(root1, d, p); root2 = mp_modsub_1(root2, d, p); zeros[(*num_zeros)++] = mp_modmul_1(root1, ainv, p); zeros[(*num_zeros)++] = mp_modmul_1(root2, ainv, p); return; } /* For an increasing sequence of integers 's', compute the polynomial gcd((x-s)^(p-1)/2 - 1, f). If the result is not g = 1 or g = f, this is a nontrivial splitting of f. References require choosing s randomly, but however s is chosen there is a 50% chance that it will split f. Since only 0 <= s < p is valid, we choose each s in turn; choosing random s allows the possibility that the same s gets chosen twice (mod p), which would waste time */ while (shift < p) { poly_xpow(xpow, shift, (p-1)/2, f, p); poly_cp(g, xpow); g->coef[0] = mp_modsub_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); if (g->degree > 0) break; shift++; } /* f was split; repeat the splitting process on the two halves of f. The linear factors of f are either somewhere in x^((p-1)/2) - 1, in x^((p-1)/2) + 1, or 'shift' itself is a linear factor. Test each of these possibilities in turn. In the first two cases, begin trying values of s strictly greater than have been tried thus far */ degree1 = g->degree; get_zeros_rec(zeros, shift + 1, num_zeros, g, p); poly_cp(g, xpow); g->coef[0] = mp_modadd_1(g->coef[0], 1, p); poly_fix_degree(g); poly_gcd(g, f, p); degree2 = g->degree; if (degree2 > 0) get_zeros_rec(zeros, shift + 1, num_zeros, g, p); if (degree1 + degree2 < f->degree) zeros[(*num_zeros)++] = (shift == 0 ? 0 : p - shift); }
static inline u_int32_t mp_modsqrt_1(u_int32_t a, u_int32_t p) { u_int32_t a0 = a; if((p & 7) == 3 || (p & 7) == 7) { return mp_expo_1(a0, (p+1)/4, p); } else if((p & 7) == 5) { u_int32_t x, y; if(a0 >= p) a0 = a0 % p; x = mp_expo_1(a0, (p+3)/8, p); if(mp_modmul_1(x, x, p) == a0) return x; y = mp_expo_1(2, (p-1)/4, p); return mp_modmul_1(x, y, p); } else { u_int32_t d0, d1, a1, s, t, m; u_int32_t i; if(a0 == 1) return 1; for(d0 = 2; d0 < p; d0++) { if(mp_legendre_1(d0, p) != -1) continue; t = p - 1; s = 0; while(!(t & 1)) { s++; t = t / 2; } a1 = mp_expo_1(a0, t, p); d1 = mp_expo_1(d0, t, p); for(i = 0, m = 0; i < s; i++) { u_int32_t ad; ad = mp_expo_1(d1, m, p); ad = mp_modmul_1(ad, a1, p); ad = mp_expo_1(ad, (u_int32_t)(1) << (s-1-i), p); if(ad == (p - 1)) m += (1 << i); } a1 = mp_expo_1(a0, (t+1)/2, p); d1 = mp_expo_1(d1, m/2, p); return mp_modmul_1(a1, d1, p); } } printf("modsqrt_1 failed\n"); exit(-1); return 0; }