/// P3(x, a) counts the numbers <= x that have exactly 3 /// prime factors each exceeding the a-th prime. /// Space complexity: O(pi(sqrt(x))). /// int64_t P3(int64_t x, int64_t a, int threads) { print(""); print("=== P3(x, a) ==="); print("Computation of the 3rd partial sieve function"); double time = get_wtime(); vector<int32_t> primes = generate_primes(isqrt(x)); int64_t y = iroot<3>(x); int64_t pi_y = pi_bsearch(primes, y); int64_t sum = 0; threads = ideal_num_threads(threads, pi_y, 100); #pragma omp parallel for num_threads(threads) schedule(dynamic) reduction(+: sum) for (int64_t i = a + 1; i <= pi_y; i++) { int64_t xi = x / primes[i]; int64_t bi = pi_bsearch(primes, isqrt(xi)); for (int64_t j = i; j <= bi; j++) sum += pi_bsearch(primes, xi / primes[j]) - (j - 1); } print("P3", sum, time); return sum; }
/// Factor numbers <= y FactorTable(int64_t y, int threads) { if (y > max()) throw primesum_error("y must be <= FactorTable::max()"); y = std::max<int64_t>(8, y); T T_MAX = std::numeric_limits<T>::max(); factor_.resize(get_index(y) + 1, T_MAX); int64_t sqrty = isqrt(y); int64_t thread_threshold = ipow(10, 7); threads = ideal_num_threads(threads, y, thread_threshold); int64_t thread_distance = ceil_div(y, threads); #pragma omp parallel for num_threads(threads) for (int t = 0; t < threads; t++) { int64_t low = 1; low += thread_distance * t; int64_t high = std::min(low + thread_distance, y); primesieve::iterator it(get_number(1) - 1); while (true) { int64_t i = 1; int64_t prime = it.next_prime(); int64_t multiple = next_multiple(prime, low, &i); int64_t min_m = prime * get_number(1); if (min_m > high) break; for (; multiple <= high; multiple = prime * get_number(i++)) { int64_t mi = get_index(multiple); // prime is smallest factor of multiple if (factor_[mi] == T_MAX) factor_[mi] = (T) prime; // the least significant bit indicates // whether multiple has an even (0) or odd (1) // number of prime factors else if (factor_[mi] != 0) factor_[mi] ^= 1; } if (prime <= sqrty) { int64_t j = 0; int64_t square = prime * prime; multiple = next_multiple(square, low, &j); // moebius(n) = 0 for (; multiple <= high; multiple = square * get_number(j++)) factor_[get_index(multiple)] = 0; } } } }
/// Partial sieve function (a.k.a. Legendre-sum). /// phi(x, a) counts the numbers <= x that are not divisible /// by any of the first a primes. /// int64_t phi(int64_t x, int64_t a, int threads) { if (x < 1) return 0; if (a > x) return 1; if (a < 1) return x; print(""); print("=== phi(x, a) ==="); print("Count the numbers <= x coprime to the first a primes"); double time = get_wtime(); int64_t sum = 0; if (is_phi_tiny(a)) sum = phi_tiny(x, a); else { vector<int32_t> primes = generate_n_primes(a); if (primes.at(a) >= x) sum = 1; else { // use a large pi(x) lookup table for speed int64_t sqrtx = isqrt(x); PiTable pi(max(sqrtx, primes[a])); PhiCache cache(primes, pi); int64_t pi_sqrtx = min(pi[sqrtx], a); sum = x - a + pi_sqrtx; int64_t p14 = ipow((int64_t) 10, 14); int64_t thread_threshold = p14 / primes[a]; threads = ideal_num_threads(threads, x, thread_threshold); // this loop scales only up to about 8 CPU cores threads = min(8, threads); #pragma omp parallel for schedule(dynamic, 16) \ num_threads(threads) firstprivate(cache) reduction(+: sum) for (int64_t a2 = 0; a2 < pi_sqrtx; a2++) sum += cache.phi<-1>(x / primes[a2 + 1], a2); } } print("phi", sum, time); return sum; }