Exemplo n.º 1
0
int main()
{
    try {
        cl::Context context;
        std::vector<cl::Device> devices;
        std::tie(context, devices) = init_open_cl();
        cl::CommandQueue queue(context, devices[0]);
        cl::Program program = load_program("program.cl", context, devices);
        cl_fn reduce_fn(program, "do_reduce");
        cl_fn sweep_fn(program, "do_sweep");

        std::ifstream in(INPUT_FILE);
        size_t n, npow2;
        in >> n;
        npow2 = pow(2.0, ceil(log2(n)));
        std::vector<float> in_array(npow2);
        for (size_t i = 0; i < n; ++i)
            in >> in_array[i];

        cl::Buffer out_buf(context, std::begin(in_array), std::end(in_array), false);
        std::vector<cl::Event> events;

        for (size_t offset = 1; npow2 / (offset * 2) >= WORKGROUP_SIZE; offset *= 2)
            exec_fn(reduce_fn, out_buf, npow2, offset, npow2 / offset, events, queue);

        if (npow2 < 512)
            exec_fn(reduce_fn, out_buf, npow2, 1, WORKGROUP_SIZE, events, queue);

        exec_fn(sweep_fn, out_buf, npow2, npow2 / 2, WORKGROUP_SIZE, events, queue);

        for (size_t offset = npow2 / 1024; offset > 0; offset /= 2)
            exec_fn(sweep_fn, out_buf, npow2, offset, npow2 / offset, events, queue);

        std::vector<float> out_array(n);
        queue.enqueueReadBuffer(out_buf, CL_TRUE, 0, sizeof(float) * n, &out_array[0]);

        std::ofstream out(OUTPUT_FILE);
        out << std::fixed << std::setprecision(3);
        for (size_t i = 0; i < n; i++)
            out << out_array[i] << " ";
        out << std::endl;
    }
    catch (cl::Error &e) {
        std::cerr << "ERROR: " << e.what() << " (" << e.err() << ")" << std::endl;
    }
    catch (std::runtime_error &e) {
        std::cerr << e.what() << std::endl;
    }

    return 0;
}
Exemplo n.º 2
0
/////////////////// move to lib later
int cos_ainv_handling(void) {
    struct __cos_ainv_srv_thd curr_data = { .stop = 0 };
    struct __cos_ainv_srv_thd *curr = &curr_data;
    int acap, i;
    int curr_thd_id = cos_get_thd_id();

    assert(curr);

    printc("upcall thread %d (core %ld) waiting in pong...\n", cos_get_thd_id(), cos_cpuid());
    sched_block(cos_spd_id(), 0);
    printc("upcall thread %d (core %ld) up!\n", cos_get_thd_id(), cos_cpuid());

    curr->acap = acap_srv_lookup(cos_spd_id());
    curr->cli_ncaps = acap_srv_ncaps(cos_spd_id());
    curr->shared_page = acap_srv_lookup_ring(cos_spd_id());
    assert(curr->acap && curr->cli_ncaps && curr->shared_page);

    init_shared_page(&curr->shared_struct, curr->shared_page);

    curr->fn_mapping = malloc(sizeof(vaddr_t) * curr->cli_ncaps);
    if (unlikely(curr->fn_mapping == NULL)) goto err_nomem;
    for (i = 0; i < curr->cli_ncaps; i++) {
        curr->fn_mapping[i] = (vaddr_t)acap_srv_fn_mapping(cos_spd_id(), i);
    }

    assert(curr);
    acap = curr->acap;

    printc("server %ld, upcall thd %d has acap %d.\n",
           cos_spd_id(), curr_thd_id, acap);

    struct shared_struct *shared_struct = &curr->shared_struct;
    CK_RING_INSTANCE(inv_ring) *ring = shared_struct->ring;
    assert(ring);

    struct inv_data inv;
    while (curr->stop == 0) {
        CLEAR_SERVER_ACTIVE(shared_struct); // clear active early to avoid race (and atomic instruction)
        if (CK_RING_DEQUEUE_SPSC(inv_ring, ring, &inv) == false) {
            printc("thread %d waiting on acap %d\n", cos_get_thd_id(), acap);
            cos_areceive(acap);
            printc("thread %d up from areceive\n", cos_get_thd_id());
        } else {
            SET_SERVER_ACTIVE(shared_struct); /* setting us active */
            printc("core %ld: got inv for cap %d, param %d, %d, %d, %d\n",
                   cos_cpuid(), inv.cap, inv.params[0], inv.params[1], inv.params[2], inv.params[3]);
            if (unlikely(inv.cap > curr->cli_ncaps || !curr->fn_mapping[inv.cap])) {
                printc("Server thread %d in comp %ld: receiving invalid cap %d\n",
                       cos_get_thd_id(), cos_spd_id(), inv.cap);
            } else {
                assert(curr->fn_mapping[inv.cap]);
                //execute!
                exec_fn((void *)curr->fn_mapping[inv.cap], 4, inv.params);
                // and write to the return value.
            }
        }
    }

    return 0;
err_nomem:
    printc("couldn't allocate memory in spd %ld\n", cos_spd_id());
    return -1;
}

void cos_upcall_fn(upcall_type_t t, void *arg1, void *arg2, void *arg3)
{
    switch (t) {
    case COS_UPCALL_THD_CREATE:
    {
        cos_ainv_handling();
        break;
    }
    default:
        /* fault! */
        //*(int*)NULL = 0;
        printc("\n upcall type t %d\n", t);
        return;
    }
    return;
}