C++ (Cpp) ideal_num_threads示例

编程语言: C++ (Cpp)

方法/功能: ideal_num_threads

hotexamples.com的示例: 3

C++ (Cpp) ideal_num_threads - 已找到3个示例。这些是从开源项目中提取的最受好评的ideal_num_threads现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

/// P3(x, a) counts the numbers <= x that have exactly 3
/// prime factors each exceeding the a-th prime.
/// Space complexity: O(pi(sqrt(x))).
///
int64_t P3(int64_t x, int64_t a, int threads)
{
  print("");
  print("=== P3(x, a) ===");
  print("Computation of the 3rd partial sieve function");

  double time = get_wtime();
  vector<int32_t> primes = generate_primes(isqrt(x));

  int64_t y = iroot<3>(x);
  int64_t pi_y = pi_bsearch(primes, y);
  int64_t sum = 0;

  threads = ideal_num_threads(threads, pi_y, 100);

  #pragma omp parallel for num_threads(threads) schedule(dynamic) reduction(+: sum)
  for (int64_t i = a + 1; i <= pi_y; i++)
  {
    int64_t xi = x / primes[i];
    int64_t bi = pi_bsearch(primes, isqrt(xi));

    for (int64_t j = i; j <= bi; j++)
      sum += pi_bsearch(primes, xi / primes[j]) - (j - 1);
  }

  print("P3", sum, time);
  return sum;
}

示例#2

显示文件

文件： FactorTable.hpp 项目： kimwalisch/primesum

  /// Factor numbers <= y
  FactorTable(int64_t y, int threads)
  {
    if (y > max())
      throw primesum_error("y must be <= FactorTable::max()");

    y = std::max<int64_t>(8, y);
    T T_MAX = std::numeric_limits<T>::max();
    factor_.resize(get_index(y) + 1, T_MAX);

    int64_t sqrty = isqrt(y);
    int64_t thread_threshold = ipow(10, 7);
    threads = ideal_num_threads(threads, y, thread_threshold);
    int64_t thread_distance = ceil_div(y, threads);

    #pragma omp parallel for num_threads(threads)
    for (int t = 0; t < threads; t++)
    {
      int64_t low = 1;
      low += thread_distance * t;
      int64_t high = std::min(low + thread_distance, y);
      primesieve::iterator it(get_number(1) - 1);

      while (true)
      {
        int64_t i = 1;
        int64_t prime = it.next_prime();
        int64_t multiple = next_multiple(prime, low, &i);
        int64_t min_m = prime * get_number(1);

        if (min_m > high)
          break;

        for (; multiple <= high; multiple = prime * get_number(i++))
        {
          int64_t mi = get_index(multiple);
          // prime is smallest factor of multiple
          if (factor_[mi] == T_MAX)
            factor_[mi] = (T) prime;
          // the least significant bit indicates
          // whether multiple has an even (0) or odd (1)
          // number of prime factors
          else if (factor_[mi] != 0)
            factor_[mi] ^= 1;
        }

        if (prime <= sqrty)
        {
          int64_t j = 0;
          int64_t square = prime * prime;
          multiple = next_multiple(square, low, &j);

          // moebius(n) = 0
          for (; multiple <= high; multiple = square * get_number(j++))
            factor_[get_index(multiple)] = 0;
        }
      }
    }
  }

示例#3

显示文件

/// Partial sieve function (a.k.a. Legendre-sum).
/// phi(x, a) counts the numbers <= x that are not divisible
/// by any of the first a primes.
///
int64_t phi(int64_t x, int64_t a, int threads)
{
  if (x < 1) return 0;
  if (a > x) return 1;
  if (a < 1) return x;

  print("");
  print("=== phi(x, a) ===");
  print("Count the numbers <= x coprime to the first a primes");

  double time = get_wtime();
  int64_t sum = 0;

  if (is_phi_tiny(a))
    sum = phi_tiny(x, a);
  else
  {
    vector<int32_t> primes = generate_n_primes(a);

    if (primes.at(a) >= x)
      sum = 1;
    else
    {
      // use a large pi(x) lookup table for speed
      int64_t sqrtx = isqrt(x);
      PiTable pi(max(sqrtx, primes[a]));
      PhiCache cache(primes, pi);

      int64_t pi_sqrtx = min(pi[sqrtx], a); 
      sum = x - a + pi_sqrtx;

      int64_t p14 = ipow((int64_t) 10, 14);
      int64_t thread_threshold = p14 / primes[a];
      threads = ideal_num_threads(threads, x, thread_threshold);

      // this loop scales only up to about 8 CPU cores
      threads = min(8, threads);

      #pragma omp parallel for schedule(dynamic, 16) \
          num_threads(threads) firstprivate(cache) reduction(+: sum)
      for (int64_t a2 = 0; a2 < pi_sqrtx; a2++)
        sum += cache.phi<-1>(x / primes[a2 + 1], a2);
    }
  }

  print("phi", sum, time);
  return sum;
}