Beispiel #1
0
static boost::shared_ptr<std::vector<char>>
        hpxcl_single_calculate(std::vector<float> &a,
                               std::vector<float> &b,
                               std::vector<float> &c,
                               double* t_nonblock,
                               double* t_sync,
                               double* t_finish)
{
    // do nothing if matrices are wrong
    if(a.size() != b.size() || b.size() != c.size())
    {
        return boost::shared_ptr<std::vector<char>>();
    }

    size_t size = a.size();

    // copy data to gpu
    shared_future<event> write_a_event =
        hpxcl_single_buffer_a.enqueue_write(0, size*sizeof(float), a.data());
    shared_future<event> write_b_event =
        hpxcl_single_buffer_b.enqueue_write(0, size*sizeof(float), b.data());
    shared_future<event> write_c_event =
        hpxcl_single_buffer_c.enqueue_write(0, size*sizeof(float), c.data());

    // wait for write to finish
    write_a_event.get().await();
    write_b_event.get().await();
    write_c_event.get().await();

    // start time measurement
    timer_start();

    // set work dimensions
    work_size<1> dim;
    dim[0].offset = 0;
    dim[0].size = size;

    // run exp kernel
    shared_future<event> kernel_exp_event =
        hpxcl_single_exp_kernel.enqueue(dim, write_b_event);

    // run add kernel
    std::vector<shared_future<event>> add_dependencies;
    add_dependencies.push_back(kernel_exp_event);
    add_dependencies.push_back(write_a_event);
    shared_future<event> kernel_add_event =
        hpxcl_single_add_kernel.enqueue(dim, add_dependencies);

    // run dbl kernel
    shared_future<event> kernel_dbl_event =
        hpxcl_single_dbl_kernel.enqueue(dim, write_c_event);

    // run mul kernel
    std::vector<shared_future<event>> mul_dependencies;
    mul_dependencies.push_back(kernel_add_event);
    mul_dependencies.push_back(kernel_dbl_event);
    shared_future<event> kernel_mul_event =
        hpxcl_single_mul_kernel.enqueue(dim, mul_dependencies);

    // run log kernel
    shared_future<event> kernel_log_event_future =
        hpxcl_single_log_kernel.enqueue(dim, kernel_mul_event);

    ////////// UNTIL HERE ALL CALLS WERE NON-BLOCKING /////////////////////////

    // get time of non-blocking calls
    *t_nonblock = timer_stop();

    // wait for all nonblocking calls to finish
    event kernel_log_event = kernel_log_event_future.get();

    // get time of synchronization
    *t_sync = timer_stop();

    // wait for the end of the execution
    kernel_log_event.await();

    // get total time of execution
    *t_finish = timer_stop();

    // enqueue result read
    shared_future<event> read_event_future =
        hpxcl_single_buffer_z.enqueue_read(0, size*sizeof(float),
                                           kernel_log_event);

    // wait for enqueue_read to return the event
    event read_event = read_event_future.get();

    // wait for calculation to complete and return data
    boost::shared_ptr<std::vector<char>> data_ptr = read_event.get_data().get();

    // return the computed data
    return data_ptr;


}