TEST_F(MPIProfileTest, progress) { struct geopm_prof_c *prof; uint64_t region_id[3]; struct geopm_time_s start, curr; double timeout = 0.0; int rank; int num_node = 0; (void) geopm_comm_num_node(MPI_COMM_WORLD, &num_node); ASSERT_TRUE(num_node > 1); MPI_Comm_rank(MPI_COMM_WORLD, &rank); ASSERT_EQ(0, geopm_prof_create("progress_test", m_shm_key, MPI_COMM_WORLD, &prof)); ASSERT_EQ(0, geopm_prof_region(prof, "loop_one", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[0])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[0])); ASSERT_EQ(0, geopm_time(&start)); while (timeout < 1.0) { ASSERT_EQ(0, geopm_time(&curr)); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(prof, region_id[0], timeout/1.0); } ASSERT_EQ(0, geopm_prof_exit(prof, region_id[0])); timeout = 0.0; ASSERT_EQ(0, geopm_prof_region(prof, "loop_two", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[1])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[1])); ASSERT_EQ(0, geopm_time(&start)); while (timeout < 2.0) { ASSERT_EQ(0, geopm_time(&curr)); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(prof, region_id[1], timeout/2.0); } ASSERT_EQ(0, geopm_prof_exit(prof, region_id[1])); timeout = 0.0; ASSERT_EQ(0, geopm_prof_region(prof, "loop_three", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[2])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[2])); ASSERT_EQ(0, geopm_time(&start)); while (timeout < 3.0) { ASSERT_EQ(0, geopm_time(&curr)); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(prof, region_id[2], timeout/3.0); } ASSERT_EQ(0, geopm_prof_exit(prof, region_id[2])); ASSERT_EQ(0, geopm_prof_print(prof, m_log_file.c_str(), 0)); if (m_is_node_root) { ASSERT_EQ(0, parse_log(false)); } ASSERT_EQ(0, geopm_prof_destroy(prof)); }
int tutorial_stream_profiled(double big_o, int do_report) { int err = 0; if (big_o != 0.0) { size_t cline_size = 64; size_t num_stream = (size_t)big_o * 500000000; size_t mem_size = sizeof(double) * num_stream; double *a = NULL; double *b = NULL; double *c = NULL; double scalar = 3.0; uint64_t stream_rid; if (!err) { err = geopm_prof_region("tutorial_stream", GEOPM_POLICY_HINT_MEMORY, &stream_rid); } err = posix_memalign((void *)&a, cline_size, mem_size); if (!err) { err = posix_memalign((void *)&b, cline_size, mem_size); } if (!err) { err = posix_memalign((void *)&c, cline_size, mem_size); } if (!err) { #pragma omp parallel for for (int i = 0; i < num_stream; i++) { a[i] = 0.0; b[i] = 1.0; c[i] = 2.0; } if (do_report) { printf("Executing profiled STREAM triad on length %d vectors.\n", num_stream); fflush(stdout); } err = geopm_prof_enter(stream_rid); } if (!err) { #ifdef _OPENMP err = stream_profiled_omp(stream_rid, num_stream, scalar, a, b, c); #else err = stream_profiled_serial(stream_rid, num_stream, scalar, a, b, c); #endif } if (!err) { err = geopm_prof_exit(stream_rid); } if (!err) { free(c); free(b); free(a); } } }
int main(int argc, char**argv) { uint64_t region_id[3]; struct geopm_time_s start, curr; double timeout = 0; int rank; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); geopm_prof_region("loop_one", GEOPM_REGION_HINT_UNKNOWN, ®ion_id[0]); geopm_prof_enter(region_id[0]); geopm_time(&start); while (timeout < 1.0) { geopm_time(&curr); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(region_id[2], timeout/1.0); } geopm_prof_exit(region_id[0]); geopm_prof_region("loop_two", GEOPM_REGION_HINT_UNKNOWN, ®ion_id[1]); geopm_prof_enter(region_id[1]); geopm_time(&start); while (timeout < 2.0) { geopm_time(&curr); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(region_id[2], timeout/2.0); } geopm_prof_exit(region_id[1]); geopm_prof_region("loop_three", GEOPM_REGION_HINT_UNKNOWN, ®ion_id[2]); geopm_prof_enter(region_id[2]); geopm_time(&start); while (timeout < 3.0) { geopm_time(&curr); timeout = geopm_time_diff(&start, &curr); geopm_prof_progress(region_id[2], timeout/3.0); } geopm_prof_exit(region_id[2]); MPI_Finalize(); return 0; }
TEST_F(MPIProfileTest, outer_sync) { struct geopm_prof_c *prof; uint64_t region_id[4]; int rank; int num_node = 0; (void) geopm_comm_num_node(MPI_COMM_WORLD, &num_node); ASSERT_LT(1, num_node); MPI_Comm_rank(MPI_COMM_WORLD, &rank); ASSERT_EQ(0, geopm_prof_create("outer_sync_test", m_shm_key, MPI_COMM_WORLD, &prof)); for (int i = 0; i < 3; i++) { ASSERT_EQ(0, geopm_prof_outer_sync(prof)); ASSERT_EQ(0, geopm_prof_region(prof, "loop_one", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[0])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[0])); sleep_exact(1.0); ASSERT_EQ(0, geopm_prof_exit(prof, region_id[0])); ASSERT_EQ(0, geopm_prof_region(prof, "loop_two", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[1])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[1])); sleep_exact(2.0); ASSERT_EQ(0, geopm_prof_exit(prof, region_id[1])); ASSERT_EQ(0, geopm_prof_region(prof, "loop_three", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id[2])); ASSERT_EQ(0, geopm_prof_enter(prof, region_id[2])); sleep_exact(3.0); ASSERT_EQ(0, geopm_prof_exit(prof, region_id[2])); MPI_Barrier(MPI_COMM_WORLD); } ASSERT_EQ(0, geopm_prof_print(prof, m_log_file.c_str(), 0)); if (m_is_node_root) { ASSERT_EQ(0, parse_log_loop()); } ASSERT_EQ(0, geopm_prof_destroy(prof)); }
int main(int argc, char **argv) { int err = 0; int rank; int verbosity = 0; uint64_t init_rid; char *config_path = NULL; const char *usage = "\n" "%s -h | --help\n" " Print this help message.\n" "\n" "%s [--verbose] [config_file]\n" "\n" " --verbose: Print output from rank zero as every region executes.\n" "\n" " config_file: Path to json file containing loop count and sequence\n" " of regions in each loop.\n" "\n" " Example configuration json string:\n" "\n" " {\"loop-count\": 10,\n" " \"region\": [\"sleep\", \"stream\", \"dgemm\", \"stream\", \"all2all\"],\n" " \"big-o\": [1.0, 1.0, 1.0, 1.0, 1.0]}\n" "\n" " The \"loop-count\" value is an integer that sets the\n" " number of loops executed. Each time through the loop\n" " the regions listed in the \"region\" array are\n" " executed. The \"big-o\" array gives double precision\n" " values for each region. Region names can be one of\n" " the following options:\n" "\n" " sleep: Executes clock_nanosleep() for big-o seconds.\n" "\n" " spin: Executes a spin loop for big-o seconds.\n" "\n" " stream: Executes stream \"triadd\" on a vector with\n" " length proportional to big-o.\n" "\n" " dgemm: Dense matrix-matrix multiply with floating\n" " point operations proportional to big-o.\n" "\n" " all2all: All processes send buffers to all other\n" " processes. The time of this operation is\n" " proportional to big-o.\n" "\n" " Example configuration json string with imbalance and\n" " progress:\n" "\n" " {\"loop-count\": 10,\n" " \"region\": [\"sleep\", \"stream-progress\", \"dgemm-imbalance\", \"stream\", \"all2all\"],\n" " \"big-o\": [1.0, 1.0, 1.0, 1.0, 1.0],\n" " \"hostname\": [\"compute-node-3\", \"compute-node-15\"],\n" " \"imbalance\": [0.05, 0.15]}\n" "\n" " If \"-imbalance\" is appended to any region name in\n" " the configuration file and the \"hostname\" and\n" " \"imbalance\" fields are provided then those\n" " regions will have an injected delay on the hosts\n" " listed. In the above example a 5%% delay on\n" " \"my-compute-node-3\" and a 15%% delay on\n" " \"my-compute-node-15\" are injected when executing\n" " the dgemm region.\n" "\n" " If \"-progress\" is appended to any region name in the\n" " configuration, then progress for the region will be\n" " reported through the geopm_prof_progress API.\n" "\n" "\n"; err = MPI_Init(&argc, &argv); if (!err) { err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if (!err && argc > 1) { if (strncmp(argv[1], "--help", strlen("--help")) == 0 || strncmp(argv[1], "-h", strlen("-h")) == 0) { if (!rank) { printf(usage, argv[0], argv[0]); } return 0; } int offset = 1; if (strncmp(argv[1], "--verbose", strlen("--verbose")) == 0) { if (!rank) { verbosity = 1; } ++offset; } if (argc > offset) { config_path = argv[offset]; } } if (!err) { err = geopm_prof_region("model-init", GEOPM_POLICY_HINT_UNKNOWN, &init_rid); } if (!err) { err = geopm_prof_enter(init_rid); } if (!err) { // Do application initialization uint64_t loop_count = 0; std::vector<std::string> region_sequence; std::vector<double> big_o_sequence; if (config_path) { geopm::model_parse_config(config_path, loop_count, region_sequence, big_o_sequence); } else { // Default values if no configuration is specified loop_count = 10; region_sequence = {"sleep", "stream", "dgemm", "stream", "all2all"}; big_o_sequence = {1.0, 1.0, 1.0, 1.0, 1.0}; } geopm::ModelApplication app(loop_count, region_sequence, big_o_sequence, verbosity, rank); err = geopm_prof_exit(init_rid); if (!err) { // Run application app.run(); } } if (!err) { err = MPI_Finalize(); } if (err) { char err_msg[NAME_MAX]; geopm_error_message(err, err_msg, NAME_MAX); std::cerr << "ERROR: " << argv[0] << ": " << err_msg << std::endl; } return err; }
int main(int argc, char **argv) { int size = 0; int rank = 0; int err = MPI_Init(&argc, &argv); if (!err) { err = MPI_Comm_size(MPI_COMM_WORLD, &size); } if (!err) { err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if (!err && !rank) { printf("MPI_COMM_WORLD size: %d\n", size); } uint64_t sleep_rid; uint64_t stream_rid; uint64_t dgemm_rid; uint64_t all2all_rid; if (!err) { err = geopm_prof_region("tutorial_sleep", GEOPM_REGION_HINT_UNKNOWN, &sleep_rid); } if (!err) { err = geopm_prof_region("tutorial_stream", GEOPM_REGION_HINT_MEMORY, &stream_rid); } if (!err) { err = geopm_prof_region("tutorial_dgemm", GEOPM_REGION_HINT_COMPUTE, &dgemm_rid); } if (!err) { err = geopm_prof_region("tutorial_all2all", GEOPM_REGION_HINT_NETWORK, &all2all_rid); } int num_iter = 10; double sleep_big_o = 1.0; double stream0_big_o = 1.0; double dgemm_big_o = 1.0; double all2all_big_o = 1.0; double stream1_big_o = 1.0; if (!rank) { printf("Beginning loop of %d iterations.\n", num_iter); fflush(stdout); } for (int i = 0; !err && i < num_iter; ++i) { err = geopm_prof_epoch(); if (!err) { err = geopm_prof_enter(sleep_rid); } if (!err) { err = tutorial_sleep(sleep_big_o, 0); } if (!err) { err = geopm_prof_exit(sleep_rid); } if (!err) { err = geopm_prof_enter(stream_rid); } if (!err) { err = tutorial_stream(stream0_big_o, 0); } if (!err) { err = geopm_prof_exit(stream_rid); } if (!err) { err = geopm_prof_enter(dgemm_rid); } if (!err) { err = tutorial_dgemm(dgemm_big_o, 0); } if (!err) { err = geopm_prof_exit(dgemm_rid); } if (!err) { err = geopm_prof_enter(stream_rid); } if (!err) { err = tutorial_stream(stream1_big_o, 0); } if (!err) { err = geopm_prof_exit(stream_rid); } if (!err) { err = geopm_prof_enter(all2all_rid); } if (!err) { err = tutorial_all2all(all2all_big_o, 0); } if (!err) { err = geopm_prof_exit(all2all_rid); } if (!err && !rank) { printf("Iteration=%.3d\r", i); fflush(stdout); } } if (!err && !rank) { printf("Completed loop. \n"); fflush(stdout); } int err_fin = MPI_Finalize(); err = err ? err : err_fin; return err; }
static int run_something(void) { int err = 0; struct geopm_ctl_c *ctl; struct geopm_policy_c *policy; struct geopm_prof_c *prof; struct geopm_tprof_c *tprof; double x = 0; int num_thread, thread_idx, i, num_iter = 1000000, iter_per_step = 100, chunk_size = 128; int step_counter = 0; uint64_t region_id; #pragma omp parallel { num_thread = omp_get_num_threads(); } // In this example we will create the policy, but in general it // should be created prior to application runtime. err = geopm_policy_create("", "profile_policy", &policy); if (!err) { err = geopm_policy_mode(policy, GEOPM_POLICY_MODE_PERF_BALANCE_DYNAMIC); } if (!err) { err = geopm_policy_power(policy, 2000); } if (!err) { err = geopm_policy_write(policy); } if (!err) { err = geopm_policy_destroy(policy); } // Now that we have a policy on disk, use it as a normal // application would if (!err) { err = geopm_policy_create("profile_policy", "", &policy); } if (!err) { (void)shm_unlink("/geopm_threaded_step"); err = geopm_ctl_create(policy, "/geopm_threaded_step", MPI_COMM_WORLD, &ctl); } if (!err) { err = geopm_prof_create("threaded_step", "/geopm_threaded_step", MPI_COMM_WORLD, &prof); } if (!err) { err = geopm_tprof_create(num_thread, num_iter, chunk_size, &tprof); } if (!err) { err = geopm_ctl_step(ctl); } if (!err) { err = geopm_prof_region(prof, "main-loop", GEOPM_POLICY_HINT_UNKNOWN, ®ion_id); } if (!err) { #pragma omp parallel default(shared) private(i) { thread_idx = omp_get_thread_num(); #pragma omp for schedule(static, chunk_size) for (i = 0; i < num_iter; ++i) { x += do_something(i); geopm_tprof_increment(tprof, prof, region_id, thread_idx); if (!thread_idx) { step_counter++; if (step_counter == iter_per_step) { geopm_ctl_step(ctl); step_counter = 0; } } } } } return err; }