int hpx_main( variables_map& vm ) { if (vm.count("no-header")) header = false; // delay in seconds delay_sec = delay * 1.0E-6; std::size_t num_os_threads = hpx::get_os_thread_count(); int num_executors = vm["executors"].as<int>(); if (num_executors <= 0) throw std::invalid_argument("number of executors to use must be larger than 0"); if (num_executors > std::size_t(num_os_threads)) throw std::invalid_argument("number of executors to use must be smaller than number of OS threads"); std::size_t num_cores_per_executor = vm["cores"].as<int>(); if ((num_executors - 1) * num_cores_per_executor > num_os_threads) throw std::invalid_argument("number of cores per executor should not cause oversubscription"); if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); // Reset performance counters (if specified on command line) reset_active_counters(); // Start the clock. high_resolution_timer t; // create the executor instances using hpx::threads::executors::local_priority_queue_executor; { std::vector<local_priority_queue_executor> executors; for (std::size_t i = 0; i != std::size_t(num_executors); ++i) { // make sure we don't oversubscribe the cores, the last executor will // be bound to the remaining number of cores if ((i + 1) * num_cores_per_executor > num_os_threads) { HPX_ASSERT(i == num_executors - 1); num_cores_per_executor = num_os_threads - i * num_cores_per_executor; } executors.push_back(local_priority_queue_executor(num_cores_per_executor)); } t.restart(); for (boost::uint64_t i = 0; i < tasks; ++i) executors[i % num_executors].add(HPX_STD_BIND(&invoke_worker_timed, delay_sec)); // destructors of executors will wait for all tasks to finish executing } // Stop the clock double time_elapsed = t.elapsed(); // Stop Performance Counters stop_active_counters(); print_results(get_os_thread_count(), time_elapsed); return finalize(); }