void Sender(api::Context& ctx, size_t peer_id, size_t inner_repeat) { net::Group& group = ctx.net.group(); net::Connection& peer = group.connection(peer_id); common::StatsTimerStart inner_timer; // send blocks to peer for (size_t i = 0; i != block_count_; ++i) { data_block_.front() = counter_; data_block_.back() = counter_; ++counter_; peer.SyncSend(data_block_.data(), block_size_); } // wait for response pong size_t value; peer.Receive(&value); die_unequal(value, counter_); inner_timer.Stop(); double bw = CalcMiBs(block_count_ * block_size_, inner_timer); sLOG0 << "bandwidth" << ctx.host_rank() << "->" << peer_id << "inner_repeat" << inner_repeat << bw << "MiB/s" << "time" << (static_cast<double>(inner_timer.Microseconds()) * 1e-6); bandwidth_(ctx.host_rank(), peer_id).add(bw); }
void Sender(api::Context& ctx, size_t peer, size_t iteration) { net::Group& group = ctx.net.group(); // do an extra ping/pong round to synchronize. { // send ping to peer size_t value = counter_++; group.SendTo(peer, value); // wait for ping group.ReceiveFrom(peer, &value); die_unequal(value, counter_); } common::StatsTimerStart inner_timer; for (size_t inner = 0; inner < inner_repeats_; ++inner) { // send ping to peer size_t value = counter_++; group.SendTo(peer, value); // wait for ping group.ReceiveFrom(peer, &value); die_unequal(value, counter_); } inner_timer.Stop(); double avg = static_cast<double>(inner_timer.Microseconds()) / static_cast<double>(inner_repeats_); sLOG0 << "bandwidth" << ctx.host_rank() << "->" << peer << "iteration" << iteration << "latency" << avg; latency_(ctx.host_rank(), peer).add(avg); }
void Bandwidth::Test(api::Context& ctx) { // only work with first thread on this host. if (ctx.local_worker_id() != 0) return; net::Group& group = ctx.net.group(); bandwidth_ = AggMatrix(group.num_hosts()); // data block to send or receive block_count_ = data_size_ / block_size_; data_block_.resize(block_size_ / sizeof(size_t), 42u); for (size_t outer_repeat = 0; outer_repeat < outer_repeats_; ++outer_repeat) { common::StatsTimerStopped timer; timer.Start(); for (size_t inner_repeat = 0; inner_repeat < inner_repeats_; inner_repeat++) { // perform 1-factor ping pongs (without barriers) for (size_t round = 0; round < group.OneFactorSize(); ++round) { size_t peer = group.OneFactorPeer(round); sLOG0 << "round" << round << "me" << ctx.host_rank() << "peer_id" << peer; if (ctx.host_rank() < peer) { Sender(ctx, peer, inner_repeat); Receiver(ctx, peer); } else if (ctx.host_rank() > peer) { Receiver(ctx, peer); Sender(ctx, peer, inner_repeat); } else { // not participating in this round counter_ += 2 * block_count_; } } } timer.Stop(); size_t time = timer.Microseconds(); // calculate maximum time. group.AllReduce(time, common::maximum<size_t>()); if (ctx.my_rank() == 0) { std::cout << "RESULT" << " benchmark=" << benchmark << " hosts=" << ctx.num_hosts() << " outer_repeat=" << outer_repeat << " inner_repeats=" << inner_repeats_ << " time[us]=" << time << " time_per_ping_pong[us]=" << static_cast<double>(time) / static_cast<double>(counter_) << std::endl; } } // reduce (add) matrix to root. group.Reduce(bandwidth_); // print matrix if (ctx.my_rank() == 0) PrintMatrix(bandwidth_); }