int hpx_main( variables_map& vm ) { if (vm.count("no-header")) header = false; { if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); // Start the clock. high_resolution_timer t; for (boost::uint64_t i = 0; i < tasks; ++i) register_work(HPX_STD_BIND(&invoke_worker, delay)); // Reschedule hpx_main until all other hpx-threads have finished. We // should be resumed after most of the null px-threads have been // executed. If we haven't, we just reschedule ourselves again. do { suspend(); } while (get_thread_count(hpx::threads::thread_priority_normal) > 1); print_results(get_os_thread_count(), t.elapsed()); } return finalize(); }
int hpx_main( variables_map& vm ) { if (vm.count("no-header")) header = false; { if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); if (0 == feeders || feeders > get_os_thread_count()) throw std::invalid_argument("number of feeders must be between 1 and OS-thread-count\n"); // Start the clock. thread_aware_timer t; if (0 == vm.count("no-stack")) { for (boost::uint64_t i = 0; i != feeders; ++i) register_work(HPX_STD_BIND(&create_tasks, tasks/feeders)); } else { for (boost::uint64_t i = 0; i != feeders; ++i) register_work(HPX_STD_BIND(&create_stackless_tasks, tasks/feeders)); } // Reschedule hpx_main until all other hpx-threads have finished. We // should be resumed after most of the null HPX-threads have been // executed. If we haven't, we just reschedule ourselves again. do { suspend(); } while (get_thread_count(hpx::threads::thread_priority_normal) > 1); print_results(get_os_thread_count(), t.elapsed()); } return finalize(); }
int hpx_main( variables_map& vm ) { if (vm.count("no-header")) header = false; /////////////////////////////////////////////////////////////////////////// // Initialize the PRNG seed. if (!seed) seed = boost::uint64_t(std::time(0)); { /////////////////////////////////////////////////////////////////////// // Validate command-line arguments. if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); if (min_delay > max_delay) throw std::invalid_argument("minimum delay cannot be larger than " "maximum delay\n"); if (min_delay > total_delay) throw std::invalid_argument("minimum delay cannot be larger than" "total delay\n"); if (max_delay > total_delay) throw std::invalid_argument("maximum delay cannot be larger than " "total delay\n"); if ((min_delay * tasks) > total_delay) throw std::invalid_argument("minimum delay is too small for the " "specified total delay and number of " "tasks\n"); if ((max_delay * tasks) < total_delay) throw std::invalid_argument("maximum delay is too small for the " "specified total delay and number of " "tasks\n"); /////////////////////////////////////////////////////////////////////// // Randomly generate a description of the heterogeneous workload. std::vector<boost::uint64_t> payloads; payloads.reserve(tasks); // For random numbers, we use a 64-bit specialization of Boost.Random's // mersenne twister engine (good uniform distribution up to 311 // dimensions, cycle length 2 ^ 19937 - 1) boost::random::mt19937_64 prng(seed); boost::uint64_t current_sum = 0; for (boost::uint64_t i = 0; i < tasks; ++i) { // Credit to Spencer Ruport for putting this algorithm on // stackoverflow. boost::uint64_t const low_calc = (total_delay - current_sum) - (max_delay * (tasks - 1 - i)); bool const negative = (total_delay - current_sum) < (max_delay * (tasks - 1 - i)); boost::uint64_t const low = (negative || (low_calc < min_delay)) ? min_delay : low_calc; boost::uint64_t const high_calc = (total_delay - current_sum) - (min_delay * (tasks - 1 - i)); boost::uint64_t const high = (high_calc > max_delay) ? max_delay : high_calc; // Our range is [low, high]. boost::random::uniform_int_distribution<boost::uint64_t> dist(low, high); boost::uint64_t const payload = dist(prng); if (payload < min_delay) throw std::logic_error("task delay is below minimum"); if (payload > max_delay) throw std::logic_error("task delay is above maximum"); current_sum += payload; payloads.push_back(payload); } // Randomly shuffle the entire sequence to deal with drift. boost::function<boost::uint64_t(boost::uint64_t)> shuffler_f = boost::bind(&shuffler, boost::ref(prng), _1); std::random_shuffle(payloads.begin(), payloads.end() , shuffler_f); /////////////////////////////////////////////////////////////////////// // Validate the payloads. if (payloads.size() != tasks) throw std::logic_error("incorrect number of tasks generated"); boost::uint64_t const payloads_sum = std::accumulate(payloads.begin(), payloads.end(), 0ULL); if (payloads_sum != total_delay) throw std::logic_error("incorrect total delay generated"); /////////////////////////////////////////////////////////////////////// // Start the clock. high_resolution_timer t; /////////////////////////////////////////////////////////////////////// // Queue the tasks in a serial loop. for (boost::uint64_t i = 0; i < tasks; ++i) register_work(HPX_STD_BIND(&invoke_worker, payloads[i])); /////////////////////////////////////////////////////////////////////// // Wait for the work to finish. do { // Reschedule hpx_main until all other px-threads have finished. We // should be resumed after most of the null px-threads have been // executed. If we haven't, we just reschedule ourselves again. suspend(); } while (get_thread_count(hpx::threads::thread_priority_normal) > 1); /////////////////////////////////////////////////////////////////////// // Print the results. print_results(get_os_thread_count(), t.elapsed()); } finalize(); return 0; }
int hpx_main( variables_map& vm ) { { if (vm.count("no-header")) header = false; if (vm.count("csv-header")) csv_header = true; if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); if (suspended_tasks > tasks) throw std::invalid_argument( "suspended tasks must be smaller than tasks\n"); std::uint64_t const os_thread_count = get_os_thread_count(); /////////////////////////////////////////////////////////////////////// stage_worker_function stage_worker; if ("static-balanced-stackbased" == distribution) stage_worker = &stage_worker_static_balanced_stackbased; else if ("static-imbalanced" == distribution) stage_worker = &stage_worker_static_imbalanced; else if ("round-robin" == distribution) stage_worker = &stage_worker_round_robin; else throw std::invalid_argument( "invalid distribution type specified (valid options are " "\"static-balanced\", \"static-imbalanced\" or \"round-robin\")" ); /////////////////////////////////////////////////////////////////////// std::uint64_t tasks_per_feeder = 0; //std::uint64_t total_tasks = 0; std::uint64_t suspended_tasks_per_feeder = 0; std::uint64_t total_suspended_tasks = 0; if ("strong" == scaling) { if (tasks % os_thread_count) throw std::invalid_argument( "tasks must be cleanly divisable by OS-thread count\n"); if (suspended_tasks % os_thread_count) throw std::invalid_argument( "suspended tasks must be cleanly divisable by OS-thread " "count\n"); tasks_per_feeder = tasks / os_thread_count; //total_tasks = tasks; suspended_tasks_per_feeder = suspended_tasks / os_thread_count; total_suspended_tasks = suspended_tasks; } else if ("weak" == scaling) { tasks_per_feeder = tasks; //total_tasks = tasks * os_thread_count; suspended_tasks_per_feeder = suspended_tasks; total_suspended_tasks = suspended_tasks * os_thread_count; } else throw std::invalid_argument( "invalid scaling type specified (valid options are \"strong\" " "or \"weak\")"); /////////////////////////////////////////////////////////////////////// if (suspended_tasks != 0) { std::uint64_t gcd = boost::math::gcd(tasks_per_feeder , suspended_tasks_per_feeder); suspend_step = suspended_tasks_per_feeder / gcd; // We check earlier to make sure that there are never more // suspended tasks than tasks requested. no_suspend_step = (tasks_per_feeder / gcd) - suspend_step; } /////////////////////////////////////////////////////////////////////// std::vector<std::string> counter_shortnames; std::vector<std::string> counters; if (vm.count("counter")) { std::vector<std::string> raw_counters = vm["counter"].as<std::vector<std::string> >(); for (std::uint64_t i = 0; i < raw_counters.size(); ++i) { std::vector<std::string> entry; boost::algorithm::split(entry, raw_counters[i], boost::algorithm::is_any_of(","), boost::algorithm::token_compress_on); HPX_ASSERT(entry.size() == 2); counter_shortnames.push_back(entry[0]); counters.push_back(entry[1]); } } std::shared_ptr<hpx::util::activate_counters> ac; if (!counters.empty()) ac.reset(new hpx::util::activate_counters(counters)); /////////////////////////////////////////////////////////////////////// // Start the clock. high_resolution_timer t; if (ac) ac->reset_counters(); // This needs to stay here; we may have suspended as recently as the // performance counter reset (which is called just before the staging // function). std::uint64_t const num_thread = hpx::get_worker_thread_num(); for (std::uint64_t i = 0; i < os_thread_count; ++i) { if (num_thread == i) continue; register_work(hpx::util::bind(&stage_workers , i , tasks_per_feeder , stage_worker ) , "stage_workers" , hpx::threads::pending , hpx::threads::thread_priority_normal , i ); } stage_workers(num_thread, tasks_per_feeder, stage_worker); double warmup_estimate = t.elapsed(); // Schedule a low-priority thread; when it is executed, it checks to // make sure all the tasks (which are normal priority) have been // executed, and then it hpx::lcos::local::barrier finished(2); register_work(hpx::util::bind(&wait_for_tasks , std::ref(finished) , total_suspended_tasks ) , "wait_for_tasks", hpx::threads::pending , hpx::threads::thread_priority_low); finished.wait(); // Stop the clock double time_elapsed = t.elapsed(); print_results(os_thread_count, time_elapsed, warmup_estimate , counter_shortnames, ac); } if (suspended_tasks != 0) // Force termination of all suspended tasks. hpx::get_runtime().get_thread_manager().abort_all_suspended_threads(); return finalize(); }
int hpx_main( variables_map& vm ) { if (vm.count("no-header")) header = false; // delay in seconds delay_sec = delay * 1.0E-6; std::size_t num_os_threads = hpx::get_os_thread_count(); int num_executors = vm["executors"].as<int>(); if (num_executors <= 0) throw std::invalid_argument("number of executors to use must be larger than 0"); if (num_executors > std::size_t(num_os_threads)) throw std::invalid_argument("number of executors to use must be smaller than number of OS threads"); std::size_t num_cores_per_executor = vm["cores"].as<int>(); if ((num_executors - 1) * num_cores_per_executor > num_os_threads) throw std::invalid_argument("number of cores per executor should not cause oversubscription"); if (0 == tasks) throw std::invalid_argument("count of 0 tasks specified\n"); // Reset performance counters (if specified on command line) reset_active_counters(); // Start the clock. high_resolution_timer t; // create the executor instances using hpx::threads::executors::local_priority_queue_executor; { std::vector<local_priority_queue_executor> executors; for (std::size_t i = 0; i != std::size_t(num_executors); ++i) { // make sure we don't oversubscribe the cores, the last executor will // be bound to the remaining number of cores if ((i + 1) * num_cores_per_executor > num_os_threads) { HPX_ASSERT(i == num_executors - 1); num_cores_per_executor = num_os_threads - i * num_cores_per_executor; } executors.push_back(local_priority_queue_executor(num_cores_per_executor)); } t.restart(); for (boost::uint64_t i = 0; i < tasks; ++i) executors[i % num_executors].add(HPX_STD_BIND(&invoke_worker_timed, delay_sec)); // destructors of executors will wait for all tasks to finish executing } // Stop the clock double time_elapsed = t.elapsed(); // Stop Performance Counters stop_active_counters(); print_results(get_os_thread_count(), time_elapsed); return finalize(); }