Example #1
0
void test_count_down_and_wait(hpx::lcos::local::latch& l)
{
    ++num_threads;

    HPX_TEST(!l.is_ready());
    l.count_down_and_wait();
}
Example #2
0
void wait_for_latch(hpx::lcos::local::latch& l)
{
    l.count_down_and_wait();
}
Example #3
0
std::vector<std::vector<double> >
numa_domain_worker(std::size_t domain,
    Policy policy,
    hpx::lcos::local::latch& l,
    std::size_t part_size, std::size_t offset, std::size_t iterations,
    Vector& a, Vector& b, Vector& c)
{
    typedef typename Vector::iterator iterator;
    iterator a_begin = a.begin() + offset;
    iterator b_begin = b.begin() + offset;
    iterator c_begin = c.begin() + offset;

    iterator a_end = a_begin + part_size;
    iterator b_end = b_begin + part_size;
    iterator c_end = c_begin + part_size;

    // Initialize arrays
    hpx::parallel::fill(policy, a_begin, a_end, 1.0);
    hpx::parallel::fill(policy, b_begin, b_end, 2.0);
    hpx::parallel::fill(policy, c_begin, c_end, 0.0);

    double t = mysecond();
    hpx::parallel::for_each(policy, a_begin, a_end,
        [&policy](STREAM_TYPE & v)
        {
            v = 2.0 * v;

#if defined(HPX_DEBUG)
            // make sure memory was placed appropriately
            hpx::threads::topology& topo = retrieve_topology();
            hpx::threads::mask_cref_type mem_mask =
                topo.get_thread_affinity_mask_from_lva(
                    reinterpret_cast<hpx::naming::address_type>(&v));

            typedef typename Policy::executor_type executor_type;
            typedef hpx::parallel::executor_information_traits<
                executor_type> traits;

            std::size_t thread_num = hpx::get_worker_thread_num();
            hpx::threads::mask_cref_type thread_mask =
                traits::get_pu_mask(policy.executor(), topo, thread_num);

            HPX_ASSERT(hpx::threads::mask_size(mem_mask) ==
                hpx::threads::mask_size(thread_mask));
            HPX_ASSERT(hpx::threads::bit_and(mem_mask, thread_mask,
                hpx::threads::mask_size(mem_mask)));
#endif
        });
    t = 1.0E6 * (mysecond() - t);

    if (domain == 0)
    {
        // Get initial value for system clock.
        int quantum = checktick();
        if(quantum >= 1)
        {
            std::cout
                << "Your clock granularity/precision appears to be " << quantum
                << " microseconds.\n"
                ;
        }
        else
        {
            std::cout
                << "Your clock granularity appears to be less than one microsecond.\n"
                ;
            quantum = 1;
        }

        std::cout
            << "Each test below will take on the order"
            << " of " << (int) t << " microseconds.\n"
            << "   (= " << (int) (t/quantum) << " clock ticks)\n"
            << "Increase the size of the arrays if this shows that\n"
            << "you are not getting at least 20 clock ticks per test.\n"
            << "-------------------------------------------------------------\n"
            ;

        std::cout
            << "WARNING -- The above is only a rough guideline.\n"
            << "For best results, please be sure you know the\n"
            << "precision of your system timer.\n"
            << "-------------------------------------------------------------\n"
            ;
    }

    // synchronize across NUMA domains
    l.count_down_and_wait();

    ///////////////////////////////////////////////////////////////////////////
    // Main Loop
    std::vector<std::vector<double> > timing(4, std::vector<double>(iterations));

    double scalar = 3.0;
    for(std::size_t iteration = 0; iteration != iterations; ++iteration)
    {
        // Copy
        timing[0][iteration] = mysecond();
        hpx::parallel::copy(policy, a_begin, a_end, c_begin);
        timing[0][iteration] = mysecond() - timing[0][iteration];

        // Scale
        timing[1][iteration] = mysecond();
        hpx::parallel::transform(policy,
            c_begin, c_end, b_begin,
            [scalar](STREAM_TYPE val)
            {
                return scalar * val;
            }
        );
        timing[1][iteration] = mysecond() - timing[1][iteration];

        // Add
        timing[2][iteration] = mysecond();
        hpx::parallel::transform(policy,
            a_begin, a_end, b_begin, b_end, c_begin,
            [](STREAM_TYPE val1, STREAM_TYPE val2)
            {
                return val1 + val2;
            }
        );
        timing[2][iteration] = mysecond() - timing[2][iteration];

        // Triad
        timing[3][iteration] = mysecond();
        hpx::parallel::transform(policy,
            b_begin, b_end, c_begin, c_end, a_begin,
            [scalar](STREAM_TYPE val1, STREAM_TYPE val2)
            {
                return val1 + scalar * val2;
            }
        );
        timing[3][iteration] = mysecond() - timing[3][iteration];
    }

    return timing;
}