Пример #1
0
static cycles_t send_caps(void)
{
    errval_t err;
    cycles_t time_taken = 0;

    srand(bench_tsc());  // random starting seed

    for (int i=0; i<CAPS_PER_CORE; i++) {
        coreid_t to_core;
        do {
            to_core = rand() % num_cores;
        } while(to_core == my_coreid);

        do {
            cycles_t start =  bench_tsc();
            err = bindings[to_core]->tx_vtbl.send_cap(
                       bindings[to_core], NOP_CONT, my_caps[i]);
            if (i >= 20 && i <= (CAPS_PER_CORE - 20)) { // avoid warmup / cooldown
                time_taken += (bench_tsc() - start);
            }
        } while(redo_message(err));

        if (err_is_fail(err)) {
            DEBUG_ERR(err, "xcorecap: cap send failed\n");   
            abort(); 
        }
    }

    return time_taken / (CAPS_PER_CORE - 40);
}
Пример #2
0
void experiment(coreid_t idx)
{
    timestamps = malloc(sizeof(struct timestamps) * MAX_COUNT);
    assert(timestamps != NULL);

    struct bench_ump_binding *bu = (struct bench_ump_binding*)array[idx];
    struct flounder_ump_state *fus = &bu->ump_state;
    struct ump_chan *chan = &fus->chan;

    struct ump_chan_state *send = &chan->send_chan;
    struct ump_chan_state *recv = &chan->endpoint.chan;

    printf("Running latency between core %"PRIuCOREID" and core %"PRIuCOREID"\n", my_core_id, idx);

    /* Run experiment */
    for (int i = 0; i < MAX_COUNT; i++) {
        volatile struct ump_message *msg;
        struct ump_control ctrl;
        timestamps[i].time0 = bench_tsc();
        msg = ump_impl_get_next(send, &ctrl);
        msg->header.control = ctrl;
        while (!ump_impl_recv(recv));
        timestamps[i].time1 = bench_tsc();
    }

    /* Print results */
    for (int i = MAX_COUNT / 10; i < MAX_COUNT; i++) {
        if (timestamps[i].time1 > timestamps[i].time0) {
            printf("page %d took %"PRIuCYCLES"\n", i,
                   timestamps[i].time1 - bench_tscoverhead() -
                   timestamps[i].time0);
        }
    }
}
Пример #3
0
void timing_sync_bench(void)
{
    static cycles_t timestamp[ITERATIONS];

    for(int i = 0; i < ITERATIONS; i++) {
        cycles_t start = bench_tsc();
        errval_t err = timing_sync_timer();
        assert(err_is_ok(err));
        cycles_t end = bench_tsc();
        timestamp[i] = end - start;
    }

    for(int i = 0; i < ITERATIONS; i++) {
        printf("duration %d: %" PRIuCYCLES "\n", i, timestamp[i]);
    }

    int nthreads = 0;
    for(int i = 0; i <= MAX_COREID; i++) {
        struct intermon_binding *b = NULL;
        errval_t err = intermon_binding_get(i, &b);
        if(err_is_ok(err) && b != NULL) {
            nthreads++;
        }
    }

    printf("number of threads: %d\n", nthreads);
    printf("client done.\n");
}
Пример #4
0
static void run_experiment(void)
{
    for (int i = 0; i < MAX_COUNT; i++) {
        timestamps[i].time0 = bench_tsc();
        clock_get_timestamp();
        timestamps[i].time1 = bench_tsc();
    }
}
Пример #5
0
static void prepare_bomp(void)
{
    debug_printf("prepare_bomp\n");
    cycles_t tsc_start = bench_tsc();
    bomp_bomp_init(nthreads);
    cycles_t tsc_end = bench_tsc();
    timer_xompinit = bench_time_diff(tsc_start, tsc_end);
}
Пример #6
0
/**
 * \brief Measure overhead of taking timestamp
 */
static void measure_tsc(void)
{
    uint64_t begin;
    uint64_t end;

    begin = bench_tsc();
    for(int i = 0; i < 1000; i++) {
        end = bench_tsc();
    }

    tsc_overhead = (end - begin) / 1000;
}
Пример #7
0
int main(int argc, char *argv[])
{
    bench_init();
    int k = 300;
    while(k--) {

        uint64_t start = bench_tsc();
        for (volatile int i = 0; i < ITERATIONS; i++);
        uint64_t end = bench_tsc();

        printf("%"PRIu64"\n", end - start);
    }

  return EXIT_SUCCESS;
}
Пример #8
0
int main(int argc, char *argv[])
{
    volatile uint64_t workcnt = 0;
    int nthreads;

    debug_printf("bomptest started.\n");

    bench_init();

#if CONFIG_TRACE
    errval_t err = trace_control(TRACE_EVENT(TRACE_SUBSYS_ROUTE,
                                             TRACE_EVENT_ROUTE_BENCH_START, 0),
                                 TRACE_EVENT(TRACE_SUBSYS_ROUTE,
                                             TRACE_EVENT_ROUTE_BENCH_STOP, 0), 0);
    assert(err_is_ok(err));
#endif

    if(argc == 2) {
        nthreads = atoi(argv[1]);
        backend_span_domain(nthreads, STACK_SIZE);
        bomp_custom_init(NULL);
        omp_set_num_threads(nthreads);
    } else {
        assert(!"Specify number of threads");
    }

    trace_event(TRACE_SUBSYS_ROUTE, TRACE_EVENT_ROUTE_BENCH_START, 0);

    uint64_t start = bench_tsc();
#pragma omp parallel
    while(rdtsc() < start + 805000000ULL) {
        workcnt++;
    }
    uint64_t end = bench_tsc();

    trace_event(TRACE_SUBSYS_ROUTE, TRACE_EVENT_ROUTE_BENCH_STOP, 0);

    printf("done. time taken: %" PRIu64 " cycles.\n", end - start);

#if CONFIG_TRACE
        char *buf = malloc(4096*4096);
        trace_dump(buf, 4096*4096, NULL);
        printf("%s\n", buf);
#endif

    for(;;);
    return 0;
}
Пример #9
0
static void single_run(int32_t chunksize, int32_t repetitions)
{
    errval_t err;
    vfs_handle_t handle;

    // create file
    err = vfs_create(FILENAME, &handle);
    assert(err_is_ok(err));

    // create chunk containing arbitraty data
    uint8_t *chunk = malloc(chunksize);
    assert(chunk != NULL);

    // start time
    printf("Start run with chunksize: %" PRId32 ", repetitions: %" PRId32
           "\n", chunksize, repetitions);
    cycles_t start_cycles = bench_tsc();

    size_t written = 0;
    for (int32_t i = 0; i < repetitions; i++) {
        err = vfs_write(handle, chunk, chunksize, &written);
        assert(err_is_ok(err));
        assert(written == chunksize);
    }
    err = vfs_close(handle);
    assert(err_is_ok(err));

    // end time
    cycles_t end_cycles = bench_tsc();

    // evaluation
    cycles_t cycles = end_cycles - start_cycles;
    uint64_t ms = bench_tsc_to_ms(cycles);
    double sec = (double) ms / 1000.0;
    int64_t bytes_written = chunksize * repetitions;
    double kibibytes_written = (double) bytes_written / 1024.0;
    double mebibytes_written = (double) bytes_written / (1024.0 * 1024.0);
    double kibps = kibibytes_written / sec;
    printf("%" PRId64 " bytes (%.1f KiB, %.1f MiB) written in %" PRIuCYCLES
           " cycles (%" PRIu64 " ms, %.1f s) -> %.1f KiB/s\n", bytes_written,
           kibibytes_written, mebibytes_written, cycles, ms, sec, kibps);

    // cleanup
    free(chunk);
    err = vfs_remove(FILENAME);
    assert(err_is_ok(err));
}
Пример #10
0
void mp_barrier(cycles_t *measurement)
{
    coreid_t tid = get_core_id();

#ifdef QRM_DBG_ENABLED
    ++_num_barrier;
    uint32_t _num_barrier_recv = _num_barrier;
#endif

    debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier);

    // Recution
    // --------------------------------------------------
#ifdef QRM_DBG_ENABLED
    uint32_t _tmp =
#endif
    mp_reduce(_num_barrier);

#ifdef QRM_DBG_ENABLED
    // Sanity check
    if (tid==get_sequentializer()) {
        assert (_tmp == get_num_threads()*_num_barrier);
    }
    if (measurement)
        *measurement = bench_tsc();

#endif

    // Broadcast
    // --------------------------------------------------
    if (tid == get_sequentializer()) {
        mp_send_ab(_num_barrier);

    } else {
#ifdef QRM_DBG_ENABLED
        _num_barrier_recv =
#endif
            mp_receive_forward(0);
    }

#ifdef QRM_DBG_ENABLED
    if (_num_barrier_recv != _num_barrier) {
    debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier);
    }
    assert (_num_barrier_recv == _num_barrier);

    // Add a shared memory barrier to absolutely make sure that
    // everybody finished the barrier before leaving - this simplifies
    // debugging, as the programm will get stuck if barriers are
    // broken, rather than some threads (wrongly) continuing and
    // causing problems somewhere else
#if 0 // Enable separately
    debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n");
    shl_barrier_shm(get_num_threads());
#endif
#endif

    debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier);
}
Пример #11
0
static cycles_t retype_caps(void)
{
    errval_t err;
    cycles_t time_taken = 0;
    for (int i=0; i<CAPS_PER_CORE; i++) {
        cycles_t start =  bench_tsc();
        err = cap_retype(retyped_caps[i], my_caps[i], ObjType_Frame, CHILD_BITS);
        if (i >= 20 && i <= (CAPS_PER_CORE - 20)) { // avoid warmup / cooldown
            time_taken += (bench_tsc() - start);
        }
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "xcorecap: Retype to frame failed\n");    
        }
    }

    return time_taken / (CAPS_PER_CORE - 40);
}
Пример #12
0
void record_packet_transmit_to_bf(void){
	the_stats.last_packet_transmit_to_bf_ts = bench_tsc();
	//printf("TOBF  %"PRIu64"  -  %"PRIu64" = %"PRIu64"\n",the_stats.last_packet_transmit_to_bf_ts, the_stats.last_packet_receive_net_ts,the_stats.last_packet_transmit_to_bf_ts - the_stats.last_packet_receive_net_ts  );
	if(the_stats.last_packet_transmit_to_bf_ts > the_stats.last_packet_receive_net_ts){
		push_net_to_bf_diff(the_stats.last_packet_transmit_to_bf_ts - the_stats.last_packet_receive_net_ts );
	} else {
		//printf("TOBF Skipped packet because of wrong ts order\n");
	}
}
Пример #13
0
// called when a message is received
static inline void message_received(void)
{
    errval_t err;

    // save timestamp
    timestamps[i].time1 = bench_tsc();

    // trace receive event
    err = trace_event(TRACE_SUBSYS_MULTIHOP,
            TRACE_EVENT_MULTIHOP_MESSAGE_RECEIVE, 0);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "trace_event failed");
    }

    reply_received = true;
}
Пример #14
0
void record_packet_receive_from_net(void){
	the_stats.last_packet_receive_net_ts = bench_tsc();
}
Пример #15
0
// continue experiment
static void experiment_cont(void* arg)
{

    errval_t err;
    static bool flag = false;
    static int message_type = 0;

    // Experiment finished (with this message type)
    if (i == MAX_COUNT - 1) {

#if CONFIG_TRACE
#else
        // print measured times
        for (int j = MAX_COUNT / 10; j < MAX_COUNT; j++) {

            printf(
                    "page %d took %"PRIuCYCLES"\n",
                    j,
                    timestamps[j].time1 - bench_tscoverhead()
                            - timestamps[j].time0);
        }
#endif
        // go to next message type
        message_type++;
        flag = false;
        i = 0;
        if (message_type > 13) {

            // stop tracing
            err = trace_event(TRACE_SUBSYS_MULTIHOP,
                    TRACE_EVENT_MULTIHOP_BENCH_STOP, 0);
            if (err_is_fail(err)) {
                USER_PANIC_ERR(err, "trace_event failed");
            }

#if CONFIG_TRACE
            // dump trace
            char *buf = malloc(50*4096*4096);
            size_t length = trace_dump(buf, 20*4096*4096, NULL);
            printf("%s\n", buf);
            printf("length of buffer %lu\n", length);
#endif
            printf("client done!\n");
            return;
        }
    }

    if (!flag) { // Start experiment

#if CONFIG_TRACE
#else
        printf("Running latency test for message %s...\n",
                get_message_name(message_type));
#endif
        flag = true;
        timestamps[i].time0 = bench_tsc();
    } else { // Continue experiment
        i++;
        timestamps[i].time0 = bench_tsc();
    }

    // trace send event
    err = trace_event(TRACE_SUBSYS_MULTIHOP, TRACE_EVENT_MULTIHOP_MESSAGE_SEND,
            message_type);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "trace_event failed");
    }

    // send next message
    switch (message_type) {
    case 0:
        err = binding->tx_vtbl.fsb_empty_request(binding, NOP_CONT);
        break;
    case 1:
        err = binding->tx_vtbl.fsb_payload32_1_request(binding, NOP_CONT, 1);
        break;

    case 2:
        err = binding->tx_vtbl.fsb_payload32_2_request(binding, NOP_CONT, 1, 2);
        break;

    case 3:
        err = binding->tx_vtbl.fsb_payload32_4_request(binding, NOP_CONT, 1, 2,
                3, 4);
        break;

    case 4:
        err = binding->tx_vtbl.fsb_payload32_8_request(binding, NOP_CONT, 1, 2,
                3, 4, 5, 6, 7, 8);
        break;

    case 5:
        err = binding->tx_vtbl.fsb_payload32_16_request(binding, NOP_CONT, 1, 2,
                3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
        break;

    case 6:
        err = binding->tx_vtbl.fsb_payload64_1_request(binding, NOP_CONT, 1);
        break;

    case 7:
        err = binding->tx_vtbl.fsb_payload64_2_request(binding, NOP_CONT, 1, 2);
        break;

    case 8:
        err = binding->tx_vtbl.fsb_payload64_4_request(binding, NOP_CONT, 1, 2,
                3, 4);
        break;

    case 9:
        err = binding->tx_vtbl.fsb_payload64_8_request(binding, NOP_CONT, 1, 2,
                3, 4, 5, 6, 7, 8);
        break;

    case 10:
        err = binding->tx_vtbl.fsb_payload64_16_request(binding, NOP_CONT, 1, 2,
                3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
        break;

    case 11:
        err = binding->tx_vtbl.fsb_buffer_request(binding, NOP_CONT, &buffer,
                1);
        break;

    case 12:
        err = binding->tx_vtbl.fsb_buffer_request(binding, NOP_CONT, buffer2,
                100);
        break;

    case 13:
        err = binding->tx_vtbl.fsb_buffer_request(binding, NOP_CONT, buffer3,
                1000);
        break;

    default:
        printf("unknown message type\n");
        abort();
        break;
    }

    // make sure send was successful
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "while running experiment\n");
    }

    // receive reply (by dispatching events from the
    // waitset we use for the benchmark)
    while (reply_received == false) {
        event_dispatch(&signal_waitset);
    }

    experiment();
}
Пример #16
0
static void gw_req_memory_call_rx(struct xomp_binding *b,
                                  uint64_t addr,
                                  uint8_t type)
{
    XWI_DEBUG("gw_req_memory_call_rx: addr:%lx, tyep: %u\n", addr, type);

#if XOMP_BENCH_WORKER_EN
    cycles_t mem_timer = bench_tsc();
#endif

    struct txq_msg_st *msg_st = txq_msg_st_alloc(&txq);
    assert(msg_st != NULL);

    struct capref frame;
    if (type == XOMP_FRAME_TYPE_REPL_RW) {
        type = XOMP_FRAME_TYPE_SHARED_RW;
    }
    assert(!(worker_id & XOMP_WID_GATEWAY_FLAG));

    msg_st->send = gw_req_memory_response_tx;
    msg_st->cleanup = NULL;

    XWR_DEBUG("Requesting frame from gateway: [%016lx]\n", usrdata);

    msg_st->err = xomp_gateway_get_memory(addr, &frame);
    if (err_is_fail(msg_st->err)) {
        txq_send(msg_st);
        return;
    }

    vregion_flags_t map_flags;

    switch ((xomp_frame_type_t) type) {
        case XOMP_FRAME_TYPE_MSG:
            map_flags = VREGION_FLAGS_READ_WRITE;
            break;
        case XOMP_FRAME_TYPE_SHARED_RW:
        case XOMP_FRAME_TYPE_REPL_RW:
            map_flags = VREGION_FLAGS_READ_WRITE;
            break;
        case XOMP_FRAME_TYPE_SHARED_RO:
            map_flags = VREGION_FLAGS_READ;
            break;
        default:
            USER_PANIC("unknown type: %u", type)
            break;
    }

    struct frame_identity id;
    msg_st->err = invoke_frame_identify(frame, &id);
    if (err_is_fail(msg_st->err)) {
        txq_send(msg_st);
        return;
    }

    if (addr) {
        msg_st->err = vspace_map_one_frame_fixed_attr(addr, (1UL << id.bits),
                                                      frame, map_flags, NULL, NULL);
    } else {
        void *map_addr;
        msg_st->err = vspace_map_one_frame_attr(&map_addr, (1UL << id.bits),
                                                frame, map_flags, NULL, NULL);
    }

#if XOMP_BENCH_WORKER_EN
    mem_timer = bench_tsc() - mem_timer;
    debug_printf("%lx mem request %016lx took  %lu cycles, %lu ms\n", worker_id,
                 addr, mem_timer, bench_tsc_to_ms(mem_timer));
#endif

    txq_send(msg_st);
}
Пример #17
0
errval_t spawn_xcore_monitor(coreid_t coreid, int hwid,
                             enum cpu_type cpu_type,
                             const char *cmdline,
                             struct frame_identity urpc_frame_id,
                             struct capref kcb)
{
    uint64_t start = 0;
    const char *monitorname = NULL, *cpuname = NULL;
    genpaddr_t arch_page_size;
    errval_t err;

    err = get_architecture_config(cpu_type, &arch_page_size,
                                  &monitorname, &cpuname);
    assert(err_is_ok(err));

    DEBUG("loading kernel: %s\n", cpuname);
    DEBUG("loading 1st app: %s\n", monitorname);

    // compute size of frame needed and allocate it
    DEBUG("%s:%s:%d: urpc_frame_id.base=%"PRIxGENPADDR"\n",
           __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.base);
    DEBUG("%s:%s:%d: urpc_frame_id.size=%d\n",
           __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.bits);

    if (benchmark_flag) {
        start = bench_tsc();
    }
    static size_t cpu_binary_size;
    static lvaddr_t cpu_binary = 0;
    static genpaddr_t cpu_binary_phys;
    static const char* cached_cpuname = NULL;
    if (cpu_binary == 0) {
        cached_cpuname = cpuname;
        // XXX: Caching these for now, until we have unmap
        err = lookup_module(cpuname, &cpu_binary, &cpu_binary_phys,
                            &cpu_binary_size);
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "Can not lookup module");
            return err;
        }
    }
    // Ensure caching actually works and we're
    // always loading same binary. If this starts to fail, get rid of caching.
    assert (strcmp(cached_cpuname, cpuname) == 0);

    static size_t monitor_binary_size;
    static lvaddr_t monitor_binary = 0;
    static genpaddr_t monitor_binary_phys;
    static const char* cached_monitorname = NULL;
    if (monitor_binary == 0) {
        cached_monitorname = monitorname;
        // XXX: Caching these for now, until we have unmap
        err = lookup_module(monitorname, &monitor_binary,
                            &monitor_binary_phys, &monitor_binary_size);
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "Can not lookup module");
            return err;
        }
    }
    // Again, ensure caching actually worked (see above)
    assert (strcmp(cached_monitorname, monitorname) == 0);

    if (benchmark_flag) {
        bench_data->load = bench_tsc() - start;
        start = bench_tsc();
    }

    struct capref cpu_memory_cap;
    struct frame_identity frameid;
    size_t cpu_memory;
    err = allocate_kernel_memory(cpu_binary, arch_page_size,
                                 &cpu_memory_cap, &cpu_memory, &frameid);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "Can not allocate space for new app kernel.");
        return err;
    }

    err = cap_mark_remote(cpu_memory_cap);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "Can not mark cap remote.");
        return err;
    }

    void *cpu_buf_memory;
    err = vspace_map_one_frame(&cpu_buf_memory, cpu_memory, cpu_memory_cap,
                               NULL, NULL);
    if (err_is_fail(err)) {
        return err_push(err, LIB_ERR_VSPACE_MAP);
    }
    if (benchmark_flag) {
        bench_data->alloc_cpu = bench_tsc() - start;
        start = bench_tsc();
    }

    /* Chunk of memory to load monitor on the app core */
    struct capref spawn_memory_cap;
    struct frame_identity spawn_memory_identity;

    err = frame_alloc_identify(&spawn_memory_cap,
                               X86_CORE_DATA_PAGES * arch_page_size,
                               NULL, &spawn_memory_identity);
    if (err_is_fail(err)) {
        return err_push(err, LIB_ERR_FRAME_ALLOC);
    }

    err = cap_mark_remote(spawn_memory_cap);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "Can not mark cap remote.");
        return err;
    }
    if (benchmark_flag) {
        bench_data->alloc_mon = bench_tsc() - start;
        start = bench_tsc();
    }

    /* Load cpu */
    struct elf_allocate_state state;
    state.vbase = (char *)cpu_buf_memory + arch_page_size;
    assert(sizeof(struct x86_core_data) <= arch_page_size);
    state.elfbase = elf_virtual_base(cpu_binary);

    struct Elf64_Ehdr *cpu_head = (struct Elf64_Ehdr *)cpu_binary;
    genvaddr_t cpu_entry;

    err = elf_load(cpu_head->e_machine, elfload_allocate, &state,
                   cpu_binary, cpu_binary_size, &cpu_entry);
    if (err_is_fail(err)) {
        return err;
    }
    if (benchmark_flag) {
        bench_data->elf_load = bench_tsc() - start;
        start = bench_tsc();
    }

    err = relocate_cpu_binary(cpu_binary, cpu_head, state, frameid, arch_page_size);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "Can not relocate new kernel.");
        return err;
    }
    if (benchmark_flag) {
        bench_data->elf_reloc = bench_tsc() - start;
    }

    genvaddr_t cpu_reloc_entry = cpu_entry - state.elfbase
                                 + frameid.base + arch_page_size;
    /* Compute entry point in the foreign address space */
    forvaddr_t foreign_cpu_reloc_entry = (forvaddr_t)cpu_reloc_entry;

    /* Setup the core_data struct in the new kernel */
    struct x86_core_data *core_data = (struct x86_core_data *)cpu_buf_memory;
    switch (cpu_head->e_machine) {
    case EM_X86_64:
    case EM_K1OM:
        core_data->elf.size = sizeof(struct Elf64_Shdr);
        core_data->elf.addr = cpu_binary_phys + (uintptr_t)cpu_head->e_shoff;
        core_data->elf.num  = cpu_head->e_shnum;
        break;
    case EM_386:
        core_data->elf.size = sizeof(struct Elf32_Shdr);
        struct Elf32_Ehdr *head32 = (struct Elf32_Ehdr *)cpu_binary;
        core_data->elf.addr = cpu_binary_phys + (uintptr_t)head32->e_shoff;
        core_data->elf.num  = head32->e_shnum;
        break;
    default:
        return SPAWN_ERR_UNKNOWN_TARGET_ARCH;
    }
    core_data->module_start = cpu_binary_phys;
    core_data->module_end   = cpu_binary_phys + cpu_binary_size;
    core_data->urpc_frame_base = urpc_frame_id.base;
    core_data->urpc_frame_bits = urpc_frame_id.bits;
    core_data->monitor_binary   = monitor_binary_phys;
    core_data->monitor_binary_size = monitor_binary_size;
    core_data->memory_base_start = spawn_memory_identity.base;
    core_data->memory_bits       = spawn_memory_identity.bits;
    core_data->src_core_id       = disp_get_core_id();
    core_data->src_arch_id       = my_arch_id;
    core_data->dst_core_id       = coreid;


    struct frame_identity fid;
    err = invoke_frame_identify(kcb, &fid);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "Invoke frame identity for KCB failed. "
                            "Did you add the syscall handler for that architecture?");
    }
    DEBUG("%s:%s:%d: fid.base is 0x%"PRIxGENPADDR"\n",
           __FILE__, __FUNCTION__, __LINE__, fid.base);
    core_data->kcb = (genpaddr_t) fid.base;
#ifdef CONFIG_FLOUNDER_BACKEND_UMP_IPI
    core_data->chan_id           = chanid;
#endif

    if (cmdline != NULL) {
        // copy as much of command line as will fit
        snprintf(core_data->kernel_cmdline, sizeof(core_data->kernel_cmdline),
                "%s %s", cpuname, cmdline);
        // ensure termination
        core_data->kernel_cmdline[sizeof(core_data->kernel_cmdline) - 1] = '\0';

        DEBUG("%s:%s:%d: %s\n", __FILE__, __FUNCTION__, __LINE__, core_data->kernel_cmdline);
    }

    /* Invoke kernel capability to boot new core */
    if (cpu_type == CPU_X86_64 || cpu_type == CPU_K1OM) {
        start_aps_x86_64_start(hwid, foreign_cpu_reloc_entry);
    }

#ifndef __k1om__
    else if (cpu_type == CPU_X86_32) {
        start_aps_x86_32_start(hwid, foreign_cpu_reloc_entry);
    }
#endif

    /* Clean up */
    // XXX: Should not delete the remote caps?
    err = cap_destroy(spawn_memory_cap);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "cap_destroy failed");
    }
    err = vspace_unmap(cpu_buf_memory);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "vspace unmap CPU driver memory failed");
    }
    err = cap_destroy(cpu_memory_cap);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "cap_destroy failed");
    }

    return SYS_ERR_OK;
}
Пример #18
0
static errval_t replicate_frame(lvaddr_t addr, struct capref *frame)
{
    errval_t err;

#if XOMP_BENCH_WORKER_EN
    cycles_t repl_timer = bench_tsc();
#endif

    struct frame_identity id;
    err = invoke_frame_identify(*frame, &id);
    if (err_is_fail(err)) {
        return err;
    }

    XWR_DEBUG("Replicating frame: [%016lx]\n", id.base);

    struct capref replicate;
    err = frame_alloc(&replicate, (1UL << id.bits), NULL);
    if (err_is_fail(err)) {
        return err;
    }

    XWR_DEBUG("registering memory with DMA service\n");

#if XOMP_BENCH_WORKER_EN
    cycles_t register_timer = bench_tsc();
#endif
    err = dma_register_memory((struct dma_device *) dma_dev, *frame);
    if (err_is_fail(err)) {
        return err;
    }

    err = dma_register_memory((struct dma_device *) dma_dev, replicate);
    if (err_is_fail(err)) {
        return err;
    }

#if XOMP_BENCH_WORKER_EN
    cycles_t register_timer_end = bench_tsc();
#endif

    struct dma_req_setup setup = {
        .done_cb = dma_replication_cb,
        .cb_arg = NULL,
        .args = {
            .memcpy = {
                .src = id.base,
                .bytes = (1UL << id.bits)
            }
        }
    };

    err = invoke_frame_identify(replicate, &id);
    if (err_is_fail(err)) {
        return err;
    }
    setup.args.memcpy.dst = id.base;

    dma_replication_done = 0x0;

    XWR_DEBUG("DMA request for replication\n");

    err = dma_request_memcpy((struct dma_device *)dma_dev, &setup, NULL);
    if (err_is_fail(err)) {
        return err;
    }

    while (!dma_replication_done) {
        messages_wait_and_handle_next();
    }

    XWR_DEBUG("Replication done.\n");

    *frame = replicate;

#if XOMP_BENCH_WORKER_EN
    cycles_t timer_end = bench_tsc();
    debug_printf("%lx replication took %lu cycles, %lu ms\n", worker_id,
                 timer_end - repl_timer, bench_tsc_to_ms(timer_end - repl_timer));
    debug_printf("%lx register mem took %lu cycles, %lu ms\n", worker_id,
                 register_timer_end - register_timer, bench_tsc_to_ms(register_timer_end - register_timer));
#endif

    return SYS_ERR_OK;
}
Пример #19
0
static void add_memory_call_rx(struct xomp_binding *b,
                               struct capref frame,
                               uint64_t addr,
                               uint8_t type)
{
    XWI_DEBUG("add_memory_call_rx: addr:%lx, tyep: %u\n", addr, type);

    struct txq_msg_st *msg_st = txq_msg_st_alloc(&txq);
    assert(msg_st != NULL);

    msg_st->send = add_memory_response_tx;
    msg_st->cleanup = NULL;

    uint32_t map_flags = 0x0;

    switch ((xomp_frame_type_t) type) {
        case XOMP_FRAME_TYPE_MSG:
            map_flags = VREGION_FLAGS_READ_WRITE;
            break;
        case XOMP_FRAME_TYPE_SHARED_RW:
            map_flags = VREGION_FLAGS_READ_WRITE;
            break;
        case XOMP_FRAME_TYPE_SHARED_RO:
            map_flags = VREGION_FLAGS_READ;
            break;
        default:
            USER_PANIC("unknown type: %u", type)
            break;
    }
    struct frame_identity id;
    msg_st->err = invoke_frame_identify(frame, &id);
    if(err_is_fail(msg_st->err)) {
        txq_send(msg_st);
        return;
    }

#if XOMP_WORKER_ENABLE_DMA
    if (0) {
        // todo: replicate frame on the same node if needed..
        replicate_frame(addr, &frame);
    }
#endif

#if XOMP_BENCH_WORKER_EN
    cycles_t map_start = bench_tsc();
#endif
    if (addr) {
        msg_st->err = vspace_map_one_frame_fixed_attr(addr, (1UL << id.bits),
                                                      frame, map_flags, NULL, NULL);
    } else {
        void *map_addr;
        msg_st->err = vspace_map_one_frame_attr(&map_addr, (1UL << id.bits),
                                                frame, map_flags, NULL, NULL);
    }
#if XOMP_BENCH_WORKER_EN
    cycles_t timer_end = bench_tsc();
    debug_printf("%lx mem map %016lx took  %lu cycles, %lu ms\n", worker_id, addr,
                     timer_end - map_start, bench_tsc_to_ms(timer_end - map_start));
#endif

    txq_send(msg_st);
}
Пример #20
0
static void do_work_rx(struct xomp_binding *b,
                       uint64_t fn,
                       uint64_t arg,
                       uint64_t id,
                       uint64_t flags)
{
    errval_t err;

    XWP_DEBUG("do_work_rx: fn:%lx, id:%lx\n", fn, id);

#if XOMP_BENCH_WORKER_EN
    cycles_t work_timer = bench_tsc();
#endif

    struct txq_msg_st *msg_st = txq_msg_st_alloc(&txq);
    assert(msg_st != NULL);

    msg_st->err = SYS_ERR_OK;

    struct bomp_work *work = tls;

    XWP_DEBUG("do_work_rx: threadid = %u, nthreads = %u\n", work->thread_id,
              work->num_threads);

    g_bomp_state->num_threads = work->num_threads;

    struct xomp_msg_st *st = (struct xomp_msg_st *) msg_st;
    st->args.done_notify.id = id;

    if (arg) {
        msg_st->send = done_with_arg_tx;
        st->args.done_notify.arg = arg;
    } else {
        msg_st->send = done_notify_tx;
    }

    if (fn & XOMP_FN_INDEX_FLAG) {
        uint32_t idx = fn & ~XOMP_FN_INDEX_FLAG;
        char *fn_name;
        err = spawn_symval_lookup_idx(idx, &fn_name, &fn);
        if (err_is_fail(err)) {
            msg_st->err = err;
            txq_send(msg_st);
            return;
        }
        XWP_DEBUG("do_work_rx: function index %u -> %s\n", idx, fn_name);
    }

    xomp_worker_fn_t fnct = (xomp_worker_fn_t) fn;
    XWP_DEBUG("do_work_rx: calling fnct %p with argument %p\n", fnct, work->data);

    for (uint32_t i = 0; i < work->num_vtreads; ++i) {
        fnct(work->data);
        work->thread_id++;
    }



#if XOMP_BENCH_WORKER_EN
    work_timer = bench_tsc() - work_timer;
    debug_printf("%lx work took %lu cycles, %lu ms\n", worker_id, work_timer,
                 bench_tsc_to_ms(work_timer));
#endif

    txq_send(msg_st);
}
Пример #21
0
static int prepare_xomp(int argc,
                        char *argv[])
{
    errval_t err;

    xomp_wloc_t location = XOMP_WORKER_LOC_MIXED;
    for (int i = 3; i < argc; ++i) {
        if (!strncmp(argv[i], "--location=", 11)) {
            char *p = strchr(argv[i], '=');
            p++;
            if (!strcmp(p, "local")) {
                location = XOMP_WORKER_LOC_LOCAL;
            }
        }
    }

    if (location == XOMP_WORKER_LOC_MIXED) {
        debug_printf("waiting for xeon phi to be ready\n");
        err = xeon_phi_domain_blocking_lookup("xeon_phi.0.ready", NULL);
        EXPECT_SUCCESS(err, "nameservice_blocking_lookup");
        err = xeon_phi_domain_blocking_lookup("xeon_phi.1.ready", NULL);
        EXPECT_SUCCESS(err, "nameservice_blocking_lookup");

#if XOMP_BENCH_ENABLED
        xomp_master_bench_enable(BENCH_RUN_COUNT, nthreads,
                        XOMP_MASTER_BENCH_MEM_ADD);
#endif
    }

    struct xomp_spawn local_info = {
        .argc = argc,
        .argv = argv,
#ifdef __k1om__
        .path = "/k1om/sbin/benchmarks/bomp_mm",
#else
                    .path = "/x86_64/sbin/benchmarks/bomp_mm",
#endif
                };

    struct xomp_spawn remote_info = {
        .argc = argc,
        .argv = argv,
        .path = "/k1om/sbin/benchmarks/bomp_mm",
    };

    struct xomp_args xomp_arg = {
        .type = XOMP_ARG_TYPE_DISTINCT,
        .core_stride = 0,  // use default
        .args = {
            .distinct = {
                .nthreads = nthreads,
                .worker_loc = location,
                .nphi = 2,
                .local = local_info,
                .remote = remote_info
            }
        }
    };

    cycles_t tsc_start = bench_tsc();
    if (bomp_xomp_init(&xomp_arg)) {
        debug_printf("bomp init failed!\n");
        exit(1);
    }
    cycles_t tsc_end = bench_tsc();
    timer_xompinit = bench_time_diff(tsc_start, tsc_end);

    return (location == XOMP_WORKER_LOC_LOCAL);
}

int main(int argc,
         char *argv[])
{
    errval_t err;
    xomp_wid_t wid;

    bench_init();

    err = xomp_worker_parse_cmdline(argc, argv, &wid);
    if (err_is_ok(err)) {
        struct xomp_args xw_arg = {
            .type = XOMP_ARG_TYPE_WORKER,
            .args = {
                .worker = {
                    .id = wid
                }
            }
        };
        bomp_xomp_init(&xw_arg);
    }

    if (argc < 4) {
        debug_printf("Usage: %s <size> <numthreats>\n", argv[0]);
        exit(1);
    }

    nthreads = strtoul(argv[1], NULL, 10);
    if (nthreads == 0) {
        debug_printf("num threads must be >0\n");
        exit(1);
    }

    DEBUG("\n");
    DEBUG("======================================================\n");
    debug_printf("Num Threads: %u\n", nthreads);

    uint8_t is_shared = 0;
    for (int i = 2; i < argc; ++i) {
        if (!strcmp(argv[i], "bomp")) {
            prepare_bomp();
            is_shared = 1;
        } else if (!strcmp(argv[i], "xomp")) {
            is_shared = prepare_xomp(argc, argv);
        } else {
            debug_printf("ignoring argument {%s}\n", argv[i]);
        }
    }

    debug_printf("-------------------------------------\n");
    debug_printf("init time: %lu\n", timer_xompinit);
    debug_printf("-------------------------------------\n");
#if XOMP_BENCH_ENABLED
    xomp_master_bench_print_results();
#endif
    while (1)
        ;

}
Пример #22
0
int start_aps_x86_32_start(uint8_t core_id, genvaddr_t entry)
{
    DEBUG("%s:%d: start_aps_x86_32_start\n", __FILE__, __LINE__);

    // Copy the startup code to the real-mode address
    uint8_t *real_src = (uint8_t *) &x86_32_start_ap;
    uint8_t *real_end = (uint8_t *) &x86_32_start_ap_end;

    struct capref bootcap;
    struct acpi_rpc_client* acl = get_acpi_rpc_client();
    errval_t error_code;
    errval_t err = acl->vtbl.mm_realloc_range_proxy(acl, 16, 0x0,
                                                    &bootcap, &error_code);
    if (err_is_fail(err)) {
        USER_PANIC_ERR(err, "mm_alloc_range_proxy failed.");
    }
    if (err_is_fail(error_code)) {
        USER_PANIC_ERR(error_code, "mm_alloc_range_proxy return failed.");
    }

    void* real_base;
    err = vspace_map_one_frame(&real_base, 1<<16, bootcap, NULL, NULL);
    uint8_t* real_dest = (uint8_t*)real_base + X86_32_REAL_MODE_LINEAR_OFFSET;

    memcpy(real_dest, real_src, real_end - real_src);

    /* Pointer to the entry point called from init_ap.S */
    volatile uint64_t *absolute_entry_ptr = (volatile uint64_t *)
                                            ((
                                             (lpaddr_t) &x86_32_init_ap_absolute_entry -
                                             (lpaddr_t) &x86_32_start_ap
                                            )
                                            +
                                            real_dest);
    //copy the address of the function start (in boot.S) to the long-mode
    //assembler code to be able to perform an absolute jump
    *absolute_entry_ptr = entry;

    // pointer to the shared global variable amongst all kernels
    volatile uint64_t *ap_global = (volatile uint64_t *)
                                   ((
                                    (lpaddr_t) &x86_32_init_ap_global -
                                    (lpaddr_t) &x86_32_start_ap
                                   )
                                   + real_dest);


    genpaddr_t global;
    struct monitor_blocking_rpc_client *mc =
        get_monitor_blocking_rpc_client();
    err = mc->vtbl.get_global_paddr(mc, &global);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "invoke spawn core");
        return err_push(err, MON_ERR_SPAWN_CORE);
    }
    *ap_global = (uint64_t)(genpaddr_t)global;

    // pointer to the pseudo-lock used to detect boot up of new core
    volatile uint32_t *ap_wait = (volatile uint32_t *)
                                         ((lpaddr_t) &x86_32_init_ap_wait -
                                         ((lpaddr_t) &x86_32_start_ap) +
                                         real_dest);

    // Pointer to the lock variable in the realmode code
    volatile uint8_t *ap_lock = (volatile uint8_t *)
                                        ((lpaddr_t) &x86_32_init_ap_lock -
                                        ((lpaddr_t) &x86_32_start_ap) +
                                        real_dest);

    *ap_wait = AP_STARTING_UP;

    end = bench_tsc();
    err = invoke_send_init_ipi(ipi_cap, core_id);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "invoke send init ipi");
        return err;
    }

    err = invoke_send_start_ipi(ipi_cap, core_id, entry);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "invoke sipi");
        return err;
    }

    //give the new core a bit time to start-up and set the lock
    for (uint64_t i = 0; i < STARTUP_TIMEOUT; i++) {
        if (*ap_lock != 0) {
            break;
        }
    }

    // If the lock is set, the core has been started, otherwise assume, that
    // a core with this APIC ID doesn't exist.
    if (*ap_lock != 0) {
        while (*ap_wait != AP_STARTED);
        trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_CORE_START_REQUEST_ACK, core_id);
        *ap_lock = 0;
        return 0;
    }

    assert(!"badness");
    return -1;
}