Пример #1
0
    void Test(api::Context& ctx) {

        for (size_t outer = 0; outer < outer_repeats_; ++outer) {

            common::StatsTimerStopped t;

            size_t dummy = +4915221495089;

            t.Start();
            for (size_t inner = 0; inner < inner_repeats_; ++inner) {
                dummy = ctx.net.Broadcast(dummy);
            }
            t.Stop();

            size_t n = ctx.num_workers();
            size_t time = t.Microseconds();
            // calculate maximum time.
            time = ctx.net.AllReduce(time, common::maximum<size_t>());

            if (ctx.my_rank() == 0) {
                std::cout
                    << "RESULT"
                    << " datatype=" << "size_t"
                    << " operation=" << "broadcast"
                    << " workers=" << n
                    << " inner_repeats=" << inner_repeats_
                    << " time[us]=" << time
                    << " time_per_op[us]="
                    << static_cast<double>(time) / inner_repeats_
                    << std::endl;
            }
        }
    }
Пример #2
0
    void Test(api::Context& ctx) {

        common::StatsTimerStopped t;

        // only work with first thread on this host.
        if (ctx.local_worker_id() == 0)
        {
            mem::Manager mem_manager(nullptr, "Dispatcher");

            group_ = &ctx.net.group();
            std::unique_ptr<net::Dispatcher> dispatcher =
                group_->ConstructDispatcher();
            dispatcher_ = dispatcher.get();

            t.Start();

            for (size_t outer = 0; outer < outer_repeats_; ++outer)
            {
                rnd_ = std::default_random_engine(123456);

                active_ = 0;
                remaining_requests_ = num_requests_;

                while (active_ < limit_active_ && remaining_requests_ > 0)
                {
                    if (MaybeStartRequest()) {
                        ++active_;
                    }
                }

                dispatcher_->Loop();
            }

            t.Stop();

            // must clean up dispatcher prior to using group for other things.
        }

        size_t time = t.Microseconds();
        // calculate maximum time.
        time = ctx.net.AllReduce(time, common::maximum<size_t>());

        if (ctx.my_rank() == 0) {
            std::cout
                << "RESULT"
                << " operation=" << "rblocks"
                << " hosts=" << group_->num_hosts()
                << " requests=" << num_requests_
                << " block_size=" << block_size_
                << " limit_active=" << limit_active_
                << " time[us]=" << time
                << " time_per_op[us]="
                << static_cast<double>(time) / num_requests_
                << " total_bytes=" << block_size_ * num_requests_
                << " total_bandwidth[MiB/s]="
                << CalcMiBs(block_size_ * num_requests_, time)
                << std::endl;
        }
    }
Пример #3
0
    void Test(api::Context& ctx) {

        for (size_t outer = 0; outer < outer_repeats_; ++outer) {

            common::StatsTimerStopped t;

            size_t n = ctx.num_workers();

            t.Start();
            for (size_t inner = 0; inner < inner_repeats_; ++inner) {
                // allreduce a different value in each iteration
                size_t value = inner + ctx.my_rank();
                value = ctx.net.AllReduce(value);
                size_t expected = (n + inner) * ((n + inner) - 1) / 2 - inner * (inner - 1) / 2;
                die_unequal(value, expected);
            }
            t.Stop();

            size_t time = t.Microseconds();
            // calculate maximum time.
            time = ctx.net.AllReduce(time, common::maximum<size_t>());

            if (ctx.my_rank() == 0) {
                std::cout
                    << "RESULT"
                    << " datatype=" << "size_t"
                    << " operation=" << "allreduce"
                    << " workers=" << n
                    << " inner_repeats=" << inner_repeats_
                    << " time[us]=" << time
                    << " time_per_op[us]="
                    << static_cast<double>(time) / inner_repeats_
                    << std::endl;
            }
        }
    }
Пример #4
0
void ExperimentFull(
    api::Context& ctx, const std::string& type_as_string) {

    // transmit data to all workers.

    auto stream = ctx.GetNewCatStream();

    // write phase
    StatsTimer<true> write_timer(true);
    {
        auto writers = stream->OpenWriters();
        auto data = Generator<Type>(g_bytes);

        while (data.HasNext()) {
            Type value = data.Next();
            for (size_t tgt = 0; tgt < ctx.num_workers(); ++tgt) {
                writers[tgt](value);
            }
        }
    }
    write_timer.Stop();

    // read phase
    StatsTimer<true> read_timer(true);
    {
        auto reader = stream->OpenCatReader(true);

        while (reader.HasNext()) {
            reader.Next<Type>();
        }
    }
    read_timer.Stop();

    size_t read_microsecs = read_timer.Microseconds();
    read_microsecs =
        ctx.AllReduce(read_microsecs, common::maximum<size_t>());

    size_t write_microsecs = write_timer.Microseconds();
    write_microsecs =
        ctx.AllReduce(write_microsecs, common::maximum<size_t>());

    uint64_t host_volume = ctx.num_workers() * g_bytes;
    uint64_t total_volume = ctx.num_workers() * ctx.num_workers() * g_bytes;

    if (ctx.my_rank() == 0) {
        std::cout
            << "RESULT"
            << " datatype=" << type_as_string
            << " size=" << g_bytes
            << " write_time=" << write_microsecs
            << " read_time=" << read_microsecs
            << " write_speed_MiBs="
            << (g_bytes / write_microsecs * 1000000 / 1024 / 1024)
            << " read_speed_MiBs="
            << (g_bytes / read_microsecs * 1000000 / 1024 / 1024)
            << " host_write_speed_MiBs="
            << (host_volume / write_microsecs * 1000000 / 1024 / 1024)
            << " host_read_speed_MiBs="
            << (host_volume / read_microsecs * 1000000 / 1024 / 1024)
            << " total_write_speed_MiBs="
            << (total_volume / write_microsecs * 1000000 / 1024 / 1024)
            << " total_read_speed_MiBs="
            << (total_volume / read_microsecs * 1000000 / 1024 / 1024)
            << std::endl;
    }
}
Пример #5
0
void ExperimentAllPairs(
    api::Context& ctx, const std::string& type_as_string) {

    for (size_t src = 0; src < ctx.num_workers(); ++src) {
        for (size_t tgt = 0; tgt < ctx.num_workers(); ++tgt) {
            // transmit data from worker src -> tgt: only send data if we are
            // tgt, but as tgt receive from all.

            auto stream = ctx.GetNewCatStream();

            // write phase
            StatsTimer<true> write_timer(true);
            {
                auto writers = stream->OpenWriters();

                if (ctx.my_rank() == src) {
                    auto data = Generator<Type>(g_bytes);

                    auto& writer = writers[tgt];
                    while (data.HasNext()) {
                        writer(data.Next());
                    }
                }
            }
            write_timer.Stop();

            // read phase
            StatsTimer<true> read_timer(true);
            {
                auto reader = stream->OpenCatReader(true);

                while (reader.HasNext()) {
                    reader.Next<Type>();
                }
            }
            read_timer.Stop();

            size_t read_microsecs = read_timer.Microseconds();
            read_microsecs =
                ctx.AllReduce(read_microsecs, common::maximum<size_t>());

            size_t write_microsecs = write_timer.Microseconds();
            write_microsecs =
                ctx.AllReduce(write_microsecs, common::maximum<size_t>());

            if (ctx.my_rank() == 0) {
                std::cout
                    << "RESULT"
                    << " datatype=" << type_as_string
                    << " size=" << g_bytes
                    << " src=" << src << " tgt=" << tgt
                    << " write_time=" << write_microsecs
                    << " read_time=" << read_microsecs
                    << " write_speed_MiBs="
                    << (g_bytes / write_microsecs * 1000000 / 1024 / 1024)
                    << " read_speed_MiBs="
                    << (g_bytes / read_microsecs * 1000000 / 1024 / 1024)
                    << std::endl;
            }
        }
    }
}
Пример #6
0
void Bandwidth::Test(api::Context& ctx) {

    // only work with first thread on this host.
    if (ctx.local_worker_id() != 0) return;

    net::Group& group = ctx.net.group();

    bandwidth_ = AggMatrix(group.num_hosts());

    // data block to send or receive
    block_count_ = data_size_ / block_size_;
    data_block_.resize(block_size_ / sizeof(size_t), 42u);

    for (size_t outer_repeat = 0;
         outer_repeat < outer_repeats_; ++outer_repeat) {

        common::StatsTimerStopped timer;

        timer.Start();
        for (size_t inner_repeat = 0;
             inner_repeat < inner_repeats_; inner_repeat++) {
            // perform 1-factor ping pongs (without barriers)
            for (size_t round = 0; round < group.OneFactorSize(); ++round) {

                size_t peer = group.OneFactorPeer(round);

                sLOG0 << "round" << round
                      << "me" << ctx.host_rank() << "peer_id" << peer;

                if (ctx.host_rank() < peer) {
                    Sender(ctx, peer, inner_repeat);
                    Receiver(ctx, peer);
                }
                else if (ctx.host_rank() > peer) {
                    Receiver(ctx, peer);
                    Sender(ctx, peer, inner_repeat);
                }
                else {
                    // not participating in this round
                    counter_ += 2 * block_count_;
                }
            }
        }
        timer.Stop();

        size_t time = timer.Microseconds();
        // calculate maximum time.
        group.AllReduce(time, common::maximum<size_t>());

        if (ctx.my_rank() == 0) {
            std::cout
                << "RESULT"
                << " benchmark=" << benchmark
                << " hosts=" << ctx.num_hosts()
                << " outer_repeat=" << outer_repeat
                << " inner_repeats=" << inner_repeats_
                << " time[us]=" << time
                << " time_per_ping_pong[us]="
                << static_cast<double>(time) / static_cast<double>(counter_)
                << std::endl;
        }
    }

    // reduce (add) matrix to root.
    group.Reduce(bandwidth_);

    // print matrix
    if (ctx.my_rank() == 0)
        PrintMatrix(bandwidth_);
}