void Test(api::Context& ctx) { for (size_t outer = 0; outer < outer_repeats_; ++outer) { common::StatsTimerStopped t; size_t dummy = +4915221495089; t.Start(); for (size_t inner = 0; inner < inner_repeats_; ++inner) { dummy = ctx.net.Broadcast(dummy); } t.Stop(); size_t n = ctx.num_workers(); size_t time = t.Microseconds(); // calculate maximum time. time = ctx.net.AllReduce(time, common::maximum<size_t>()); if (ctx.my_rank() == 0) { std::cout << "RESULT" << " datatype=" << "size_t" << " operation=" << "broadcast" << " workers=" << n << " inner_repeats=" << inner_repeats_ << " time[us]=" << time << " time_per_op[us]=" << static_cast<double>(time) / inner_repeats_ << std::endl; } } }
void Test(api::Context& ctx) { for (size_t outer = 0; outer < outer_repeats_; ++outer) { common::StatsTimerStopped t; size_t n = ctx.num_workers(); t.Start(); for (size_t inner = 0; inner < inner_repeats_; ++inner) { // allreduce a different value in each iteration size_t value = inner + ctx.my_rank(); value = ctx.net.AllReduce(value); size_t expected = (n + inner) * ((n + inner) - 1) / 2 - inner * (inner - 1) / 2; die_unequal(value, expected); } t.Stop(); size_t time = t.Microseconds(); // calculate maximum time. time = ctx.net.AllReduce(time, common::maximum<size_t>()); if (ctx.my_rank() == 0) { std::cout << "RESULT" << " datatype=" << "size_t" << " operation=" << "allreduce" << " workers=" << n << " inner_repeats=" << inner_repeats_ << " time[us]=" << time << " time_per_op[us]=" << static_cast<double>(time) / inner_repeats_ << std::endl; } } }
void ExperimentFull( api::Context& ctx, const std::string& type_as_string) { // transmit data to all workers. auto stream = ctx.GetNewCatStream(); // write phase StatsTimer<true> write_timer(true); { auto writers = stream->OpenWriters(); auto data = Generator<Type>(g_bytes); while (data.HasNext()) { Type value = data.Next(); for (size_t tgt = 0; tgt < ctx.num_workers(); ++tgt) { writers[tgt](value); } } } write_timer.Stop(); // read phase StatsTimer<true> read_timer(true); { auto reader = stream->OpenCatReader(true); while (reader.HasNext()) { reader.Next<Type>(); } } read_timer.Stop(); size_t read_microsecs = read_timer.Microseconds(); read_microsecs = ctx.AllReduce(read_microsecs, common::maximum<size_t>()); size_t write_microsecs = write_timer.Microseconds(); write_microsecs = ctx.AllReduce(write_microsecs, common::maximum<size_t>()); uint64_t host_volume = ctx.num_workers() * g_bytes; uint64_t total_volume = ctx.num_workers() * ctx.num_workers() * g_bytes; if (ctx.my_rank() == 0) { std::cout << "RESULT" << " datatype=" << type_as_string << " size=" << g_bytes << " write_time=" << write_microsecs << " read_time=" << read_microsecs << " write_speed_MiBs=" << (g_bytes / write_microsecs * 1000000 / 1024 / 1024) << " read_speed_MiBs=" << (g_bytes / read_microsecs * 1000000 / 1024 / 1024) << " host_write_speed_MiBs=" << (host_volume / write_microsecs * 1000000 / 1024 / 1024) << " host_read_speed_MiBs=" << (host_volume / read_microsecs * 1000000 / 1024 / 1024) << " total_write_speed_MiBs=" << (total_volume / write_microsecs * 1000000 / 1024 / 1024) << " total_read_speed_MiBs=" << (total_volume / read_microsecs * 1000000 / 1024 / 1024) << std::endl; } }
void ExperimentAllPairs( api::Context& ctx, const std::string& type_as_string) { for (size_t src = 0; src < ctx.num_workers(); ++src) { for (size_t tgt = 0; tgt < ctx.num_workers(); ++tgt) { // transmit data from worker src -> tgt: only send data if we are // tgt, but as tgt receive from all. auto stream = ctx.GetNewCatStream(); // write phase StatsTimer<true> write_timer(true); { auto writers = stream->OpenWriters(); if (ctx.my_rank() == src) { auto data = Generator<Type>(g_bytes); auto& writer = writers[tgt]; while (data.HasNext()) { writer(data.Next()); } } } write_timer.Stop(); // read phase StatsTimer<true> read_timer(true); { auto reader = stream->OpenCatReader(true); while (reader.HasNext()) { reader.Next<Type>(); } } read_timer.Stop(); size_t read_microsecs = read_timer.Microseconds(); read_microsecs = ctx.AllReduce(read_microsecs, common::maximum<size_t>()); size_t write_microsecs = write_timer.Microseconds(); write_microsecs = ctx.AllReduce(write_microsecs, common::maximum<size_t>()); if (ctx.my_rank() == 0) { std::cout << "RESULT" << " datatype=" << type_as_string << " size=" << g_bytes << " src=" << src << " tgt=" << tgt << " write_time=" << write_microsecs << " read_time=" << read_microsecs << " write_speed_MiBs=" << (g_bytes / write_microsecs * 1000000 / 1024 / 1024) << " read_speed_MiBs=" << (g_bytes / read_microsecs * 1000000 / 1024 / 1024) << std::endl; } } } }