// A quadratic sieve implementation for integers up to 100 bits. N must be composite. mpz_class quadratic_sieve(mpz_class &N) { std::vector<uint32_t> factor_base; mpz_class sqrt_N = sqrt(N); //const unsigned long sqrt_N_long = sqrt_N.get_ui(); // Set the smoothness bound. uint32_t B; { // Approximation of the natural logarithm of N. float log_N = mpz_sizeinbase(N.get_mpz_t(), 2) * log(2); // The optimal smoothness bound is exp((0.5 + o(1)) * sqrt(log(n)*log(log(n)))). B = (uint32_t)ceil(exp(0.56 * sqrt(log_N * log(log_N)))) + 300; } // Generate the factor base using a sieve. { char *sieve = new char[B + 1]; memset(sieve, 1, B + 1); for(unsigned long p = 2; p <= B; ++p) { if(!sieve[p]) continue; if(mpz_legendre(N.get_mpz_t(), mpz_class(p).get_mpz_t()) == 1) factor_base.push_back(p); for(unsigned long i = p; i <= B; i += p) sieve[i] = 0; } delete[] sieve; } std::vector<uint32_t> X; float *Y = new float[SIEVE_CHUNK]; std::vector<std::vector<uint32_t> > smooth; int fails = 0; // The sieve boundary. uint32_t min_x = 0; uint32_t max_x = SIEVE_CHUNK; // Calculate sieve index (where to start the sieve) for each factor base number. uint32_t **fb_indexes = new uint32_t*[2]; fb_indexes[0] = new uint32_t[factor_base.size()]; fb_indexes[1] = new uint32_t[factor_base.size()]; for(uint32_t p = 0; p < factor_base.size(); ++p) { // At what indexes do we start this sieve? Solve the congruence x^2 = n (mod p) to find out. // Results in two solutions, so we do two sieve iterations for each prime in the factor base. uint32_t idxs[2]; mpz_class temp = N % mpz_class(factor_base[p]); tonelli_shanks(temp.get_ui(), factor_base[p], idxs); temp = idxs[0] - sqrt_N; temp = ((temp % factor_base[p]) + factor_base[p]) % factor_base[p]; fb_indexes[0][p] = temp.get_ui(); temp = idxs[1] - sqrt_N; temp = ((temp % factor_base[p]) + factor_base[p]) % factor_base[p]; fb_indexes[1][p] = temp.get_ui(); } float last_estimate = 0; uint32_t next_estimate = 1; // Sieve new chunks until we have enough smooth numbers. while(smooth.size() < (factor_base.size() + 20)) { // Generate our Y vector for the sieve, containing log approximations that fit in machine words. for(uint32_t t = 1; t < SIEVE_CHUNK; ++t) { // Calculating a log estimate is expensive, so don't do it for every Y[t]. if(next_estimate <= (t + min_x)) { mpz_class y = (sqrt_N + t + min_x) * (sqrt_N + t + min_x) - N; // To estimate the 2 logarithm, just count the number of bits that v takes up. last_estimate = mpz_sizeinbase(y.get_mpz_t(), 2); // The higher t gets, the less the logarithm of Y[t] changes. next_estimate = next_estimate * 1.8 + 1; } Y[t] = last_estimate; } // Perform the actual sieve. for(uint32_t p = 0; p < factor_base.size(); ++p) { float lg = log(factor_base[p]) / log(2); for(uint32_t t = 0; t < 2; ++t) { while(fb_indexes[t][p] < max_x) { Y[fb_indexes[t][p] - min_x] -= lg; fb_indexes[t][p] += factor_base[p]; } // p = 2 only has one modular root. if(factor_base[p] == 2) break; } } // Factor all values whose logarithms were reduced to approximately zero using trial division. { float threshold = log(factor_base.back()) / log(2); for(uint32_t i = 0; i < SIEVE_CHUNK; ++i) { if(fabs(Y[i]) < threshold) { mpz_class y = (sqrt_N + i + min_x) * (sqrt_N + i + min_x) - N; smooth.push_back(std::vector<uint32_t>()); for(uint32_t p = 0; p < factor_base.size(); ++p) { while(mpz_divisible_ui_p(y.get_mpz_t(), factor_base[p])) { mpz_divexact_ui(y.get_mpz_t(), y.get_mpz_t(), factor_base[p]); smooth.back().push_back(p); } } if(y == 1) { // This V was indeed B-smooth. X.push_back(i + min_x); // Break out of trial division loop if we've found enou gh smooth numbers. if(smooth.size() >= (factor_base.size() + 20)) break; } else { // This V was apparently not B-smooth, remove it. smooth.pop_back(); ++fails; } } } } min_x += SIEVE_CHUNK; max_x += SIEVE_CHUNK; } uint64_t **matrix = new uint64_t*[factor_base.size()]; // The amount of words needed to accomodate a row in the augmented matrix. int row_words = (smooth.size() + sizeof(uint64_t)) / sizeof(uint64_t); for(uint32_t i = 0; i < factor_base.size(); ++i) { matrix[i] = new uint64_t[row_words]; memset(matrix[i], 0, row_words * sizeof(uint64_t)); } for(uint32_t s = 0; s < smooth.size(); ++s) { // For each factor in the smooth number, add the factor to the corresponding element in the matrix. for(uint32_t p = 0; p < smooth[s].size(); ++p) toggle_bit(s, matrix[smooth[s][p]]); } // Gauss elimination. The dimension of the augmented matrix is factor_base.size() x (smooth.size() + 1). { uint32_t i = 0, j = 0; while(i < factor_base.size() && j < (smooth.size() + 1)) { uint32_t maxi = i; // Find pivot element. for(uint32_t k = i + 1; k < factor_base.size(); ++k) { if(get_bit(j, matrix[k]) == 1) { maxi = k; break; } } if(get_bit(j, matrix[maxi]) == 1) { std::swap(matrix[i], matrix[maxi]); for(uint32_t u = i + 1; u < factor_base.size(); ++u) { if(get_bit(j, matrix[u]) == 1) { for(int32_t w = 0; w < row_words; ++w) matrix[u][w] ^= matrix[i][w]; } } ++i; } ++j; } } mpz_class a; mpz_class b; // A copy of matrix that we'll perform back-substitution on. uint64_t **back_matrix = new uint64_t*[factor_base.size()]; for(uint32_t i = 0; i < factor_base.size(); ++i) back_matrix[i] = new uint64_t[row_words]; uint32_t *x = new uint32_t[smooth.size()]; uint32_t *combination = new uint32_t[factor_base.size()]; // Loop until we've found a non-trivial factor. do { // Copy the gauss eliminated matrix. for(uint32_t i = 0; i < factor_base.size(); ++i) memcpy(back_matrix[i], matrix[i], row_words * sizeof(uint64_t)); // Clear the x vector. memset(x, 0, smooth.size() * sizeof(uint32_t)); // Perform back-substitution on our matrix that's now in row echelon form to get x. { int32_t i = factor_base.size() - 1; while(i >= 0) { // Count non-zero elements in current row. int32_t count = 0; int32_t current = -1; for(uint32_t c = 0; c < smooth.size(); ++c) { count += get_bit(c, back_matrix[i]); current = get_bit(c, back_matrix[i]) ? c : current; } // Empty row, advance to next. if(count == 0) { --i; continue; } // The system is underdetermined and we can choose x[current] freely. // To avoid the trivial solution we avoid always setting it to 0. uint32_t val = count > 1 ? rand() % 2 : get_bit(smooth.size(), back_matrix[i]); x[current] = val; for(int32_t u = 0; u <= i; ++u) { if(get_bit(current, back_matrix[u]) == 1) { if(val == 1) toggle_bit(smooth.size(), back_matrix[u]); unset_bit(current, back_matrix[u]); } } if(count == 1) --i; } } a = 1; b = 1; // The way to combine the factor base to get our square. memset(combination, 0, sizeof(uint32_t) * factor_base.size()); for(uint32_t i = 0; i < smooth.size(); ++i) { if(x[i] == 1) { for(uint32_t p = 0; p < smooth[i].size(); ++p) ++combination[smooth[i][p]]; b *= (X[i] + sqrt_N); } } for(uint32_t p = 0; p < factor_base.size(); ++p) { for(uint32_t i = 0; i < (combination[p] / 2); ++i) a *= factor_base[p]; } // If a = +/- b (mod N) we found a trivial factor, run the loop again to find a new a and b. } while(a % N == b % N || a % N == (- b) % N + N); b -= a; mpz_class factor; mpz_gcd(factor.get_mpz_t(), b.get_mpz_t(), N.get_mpz_t()); for(uint32_t i = 0; i < factor_base.size(); ++i) { delete[] matrix[i]; delete[] back_matrix[i]; } delete[] combination; delete[] Y; delete[] fb_indexes[0]; delete[] fb_indexes[1]; delete[] fb_indexes; delete[] matrix; delete[] back_matrix; delete[] x; return factor; }
void find_candidates(mpz_t num, mpz_t gmp_root) { num_cands = 0; mpz_add_ui(gmp_root, gmp_root, 1); size_t sieve_size = primes[B-1]*100; double *table = malloc(sizeof(double)*sieve_size); uint64_t root = mpz_get_ui(gmp_root); double approx = mpz_get_d(num); mpz_t tmp, tmp2; mpz_init(tmp); mpz_init(tmp2); mpz_set_ui(tmp, root); mpz_pow_ui(tmp, tmp, 2); mpz_sub(tmp, tmp, num); mpz_set(first_cands[0], tmp); unsigned int used_primes[B]; double used_log[B]; used_primes[0] = 2; used_log[0] = log_primes[0]; size_t next_prime = 1; size_t offsets[B][2]; uint64_t cand_offsets[B+1]; size_t next_cand = 1; // Find prime numbers that appear in the candidate series for(size_t p = 1; p < B; ++p) { unsigned int prime = primes[p]; mpz_set_ui(tmp2, prime); mpz_powm_ui(tmp, num, (prime-1)/2, tmp2); if(mpz_cmp_ui(tmp, 1)) // Skip non-quadratic residues continue; used_primes[next_prime] = prime; used_log[next_prime] = log_primes[p]; // Generate more exact candidates for(int i = next_cand; i < prime; ++i) { mpz_set_ui(tmp, root + i); mpz_pow_ui(tmp, tmp, 2); mpz_sub(tmp, tmp, num); mpz_set(first_cands[i], tmp); } next_cand = prime; // find offsets for them // TODO Shanks-tonelli unsigned int foo = tonelli_shanks(num, prime); printf("root for %u is %u and %u\n", prime, foo, prime-foo); size_t idx = 0; for(int i = 0; i < prime; ++i) { if(mpz_divisible_ui_p(first_cands[i], prime)) { offsets[next_prime][idx++] = i; if(idx == 2) break; } } assert(idx == 2); ++next_prime; } // sieve until we find more than B candidates, guarantees linear dependence size_t sieve_offset = 0; while(num_cands <= B) { for(size_t i = 0; i < sieve_size; ++i) { double d = root + i; d += sieve_offset; table[i] = log(d*d-approx); } // cross out even ones for(size_t i = mpz_tstbit(tmp, 1)^(sieve_offset & 1); i < sieve_size; i+=2) table[i] -= log_primes[0]; for(int p = 1; p < next_prime && num_cands <= B; ++p) { unsigned int prime = used_primes[p]; double log_prime = used_log[p]; // fprintf(stderr, "offsets[%d] are %d and %d\n", used_primes[p], offsets[p][0], offsets[p][1]); for(int x = 0; x < 2; ++x) { size_t off = (offsets[p][x] + sieve_offset + prime-1) % prime; for(int a = off; a < sieve_size; a += prime) { table[a] -= log_prime; if(table[a] > LIMIT) continue; cand_offsets[num_cands++] = root + a + sieve_offset; if(num_cands > B) break; } } } sieve_offset += sieve_size; } for(size_t i = 0; i < num_cands; ++i) { //fprintf(stderr, "%llu\n", cand_offsets[i]); // TODO REMOVE VERY SLOW STUFF mpz_set_ui(tmp, root + i); mpz_pow_ui(tmp, tmp, 2); mpz_sub(tmp, tmp, num); // mpz_out_str(stderr, 10, tmp); // fputc('\n', stderr); } fprintf(stderr, "%d used candidates\n", num_cands); free(table); mpz_clear(tmp); mpz_clear(tmp2); }