static errval_t copy_bios_mem(void) { errval_t err = SYS_ERR_OK; // Get a copy of the VBE BIOS before ACPI touches it struct capref bioscap; err = mm_alloc_range(&pci_mm_physaddr, BIOS_BITS, 0, 1UL << BIOS_BITS, &bioscap, NULL); assert(err_is_ok(err)); void *origbios; struct vregion *origbios_vregion; err = vspace_map_one_frame(&origbios, 1 << BIOS_BITS, bioscap, NULL, &origbios_vregion); assert(err_is_ok(err)); err = frame_alloc(&biosmem, 1 << BIOS_BITS, NULL); assert(err_is_ok(err)); void *newbios; struct vregion *newbios_vregion; err = vspace_map_one_frame(&newbios, 1 << BIOS_BITS, biosmem, NULL, &newbios_vregion); assert(err_is_ok(err)); memcpy(newbios, origbios, 1 << BIOS_BITS); // Unmap both vspace regions again vregion_destroy(origbios_vregion); vregion_destroy(newbios_vregion); // TODO: Implement mm_free() return err; }
static errval_t alloc_local(void) { errval_t err; size_t frame_size = 0; if (disp_xeon_phi_id() == 0) { frame_size = XPHI_BENCH_FRAME_SIZE_HOST; } else { frame_size = XPHI_BENCH_FRAME_SIZE_CARD; } if (!frame_size) { frame_size = 4096; } debug_printf("Allocating a frame of size: %lx\n", frame_size); size_t alloced_size = 0; err = frame_alloc(&local_frame, frame_size, &alloced_size); assert(err_is_ok(err)); assert(alloced_size >= frame_size); struct frame_identity id; err = invoke_frame_identify(local_frame, &id); assert(err_is_ok(err)); local_base = id.base; local_frame_sz = alloced_size; err = vspace_map_one_frame(&local_buf, alloced_size, local_frame, NULL, NULL); return err; }
// Get the bootinfo and map it in. static errval_t map_bootinfo(struct bootinfo **bootinfo) { errval_t err, msgerr; struct monitor_blocking_rpc_client *cl = get_monitor_blocking_rpc_client(); assert(cl != NULL); struct capref bootinfo_frame; size_t bootinfo_size; msgerr = cl->vtbl.get_bootinfo(cl, &err, &bootinfo_frame, &bootinfo_size); if (err_is_fail(msgerr)) { err = msgerr; } if (err_is_fail(err)) { USER_PANIC_ERR(err, "failed in get_bootinfo"); return err; } err = vspace_map_one_frame((void**)bootinfo, bootinfo_size, bootinfo_frame, NULL, NULL); assert(err_is_ok(err)); return err; }
static errval_t map_mmio_space(struct xeon_phi *phi) { errval_t err; void *mmio; struct frame_identity id; err = invoke_frame_identify(mmio_cap, &id); if (err_is_fail(err)) { return err; } err = vspace_map_one_frame(&mmio, (1UL << id.bits), mmio_cap, NULL, NULL); if (err_is_fail(err)) { return err; } XDEBUG("mapped mmio register space @ [%p]\n", mmio); phi->mmio.bits = id.bits; phi->mmio.vbase = (lvaddr_t) mmio; phi->mmio.cap = mmio_cap; phi->mmio.pbase = id.base; phi->mmio.length = (1UL << id.bits); return SYS_ERR_OK; }
void alloc_local(void) { errval_t err; #ifndef __k1om__ uint64_t minbase, maxlimit; ram_get_affinity(&minbase, &maxlimit); ram_set_affinity(XPHI_BENCH_RAM_MINBASE, XPHI_BENCH_RAM_MAXLIMIT); #endif size_t alloced_size = 0; err = frame_alloc(&local_frame, XPHI_BENCH_MSG_FRAME_SIZE, &alloced_size); EXPECT_SUCCESS(err, "frame_alloc"); #ifndef __k1om__ ram_set_affinity(minbase, maxlimit); #endif struct frame_identity id; err = invoke_frame_identify(local_frame, &id); EXPECT_SUCCESS(err, "invoke_frame_identify"); local_base = id.base; local_frame_sz = alloced_size; debug_printf("alloc_local | Frame base: %016lx, size=%lx\n", id.base, 1UL << id.bits); err = vspace_map_one_frame(&local_buf, alloced_size, local_frame, NULL, NULL); EXPECT_SUCCESS(err, "vspace_map_one_frame"); }
// FIXME: error handling (not asserts) needed in this function static void mem_allocate_handler(struct mem_binding *b, uint8_t bits, genpaddr_t minbase, genpaddr_t maxlimit) { struct capref *cap = malloc(sizeof(struct capref)); errval_t err, ret; trace_event(TRACE_SUBSYS_MEMSERV, TRACE_EVENT_MEMSERV_ALLOC, bits); /* refill slot allocator if needed */ err = slot_prealloc_refill(mm_ram.slot_alloc_inst); assert(err_is_ok(err)); /* refill slab allocator if needed */ while (slab_freecount(&mm_ram.slabs) <= MINSPARENODES) { struct capref frame; err = msa.a.alloc(&msa.a, &frame); assert(err_is_ok(err)); err = frame_create(frame, BASE_PAGE_SIZE * 8, NULL); assert(err_is_ok(err)); void *buf; err = vspace_map_one_frame(&buf, BASE_PAGE_SIZE * 8, frame, NULL, NULL); if (err_is_fail(err)) { DEBUG_ERR(err, "vspace_map_one_frame failed"); assert(buf); } slab_grow(&mm_ram.slabs, buf, BASE_PAGE_SIZE * 8); } ret = mymm_alloc(cap, bits, minbase, maxlimit); if (err_is_ok(ret)) { mem_avail -= 1UL << bits; } else { // DEBUG_ERR(ret, "allocation of %d bits in % " PRIxGENPADDR "-%" PRIxGENPADDR " failed", // bits, minbase, maxlimit); *cap = NULL_CAP; } /* Reply */ err = b->tx_vtbl.allocate_response(b, MKCONT(allocate_response_done, cap), ret, *cap); if (err_is_fail(err)) { if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { struct pending_reply *r = malloc(sizeof(struct pending_reply)); assert(r != NULL); r->b = b; r->err = ret; r->cap = cap; err = b->register_send(b, get_default_waitset(), MKCONT(retry_reply,r)); assert(err_is_ok(err)); } else { DEBUG_ERR(err, "failed to reply to memory request"); allocate_response_done(cap); } } }
static errval_t msg_open_cb(xphi_dom_id_t domain, uint64_t usrdata, struct capref msgframe, uint8_t type) { errval_t err; domainid = domain; struct frame_identity id; err = invoke_frame_identify(msgframe, &id); EXPECT_SUCCESS(err, "frame identify"); debug_printf("msg_open_cb | Frame base: %016lx, size=%lx\n", id.base, 1UL << id.bits); assert((1UL << id.bits) >= XPHI_BENCH_MSG_FRAME_SIZE); err = vspace_map_one_frame(&remote_buf, XPHI_BENCH_MSG_FRAME_SIZE, msgframe, NULL, NULL); EXPECT_SUCCESS(err, "vspace map frame"); remote_frame = msgframe; remote_base = id.base; remote_frame_sz = (1UL << id.bits); init_buffer(); connected = 0x1; debug_printf("Initializing UMP channel...\n"); err = ump_chan_init(&xphi_uc, inbuf, XPHI_BENCH_MSG_CHAN_SIZE, outbuf, XPHI_BENCH_MSG_CHAN_SIZE); EXPECT_SUCCESS(err, "initialize ump channel"); err = ump_chan_init(&xphi_uc_rev, inbuf_rev, XPHI_BENCH_MSG_CHAN_SIZE, outbuf_rev, XPHI_BENCH_MSG_CHAN_SIZE); EXPECT_SUCCESS(err, "initialize ump channel"); return SYS_ERR_OK; }
static void handler(enum exception_type type, int subtype, void *vaddr, arch_registers_state_t *regs, arch_registers_fpu_state_t *fpuregs) { debug_printf("got exception %d(%d) on %p\n", type, subtype, vaddr); assert(type == EXCEPT_PAGEFAULT); assert(subtype == PAGEFLT_WRITE); uintptr_t addr = (uintptr_t) vaddr; uintptr_t faddr = addr & ~BASE_PAGE_MASK; uintptr_t base = (uintptr_t) cow_vbuf; if (addr < base || addr >= base + BUFSIZE) { debug_printf("unexpected write pagefault on %p\n", vaddr); exit(1); } assert(cow_frame_count); debug_printf("got expected write pagefault on %p, creating copy of page\n", vaddr); // get and map copy of page size_t frame_id = (addr - base) / BASE_PAGE_SIZE; debug_printf("remapping frame %zu\n", frame_id); struct memobj *m = vregion_get_memobj(cow_vregion); assert(m); errval_t err; struct capref retframe; struct capref f = (struct capref) { .cnode = cow_frames, .slot = frame_id }; size_t retoff; struct vregion *vr; void *buf; // copy data from faulting page to new page err = vspace_map_one_frame(&buf, BASE_PAGE_SIZE, f, NULL, &vr); assert(err_is_ok(err)); memcpy(buf, (void *)faddr, BASE_PAGE_SIZE); vregion_destroy(vr); err = m->f.unfill(m, frame_id * BASE_PAGE_SIZE, &retframe, &retoff); assert(err_is_ok(err)); err = m->f.fill(m, frame_id * BASE_PAGE_SIZE, f, BASE_PAGE_SIZE); assert(err_is_ok(err)); err = m->f.pagefault(m, cow_vregion, frame_id * BASE_PAGE_SIZE, 0); assert(err_is_ok(err)); err = m->f.protect(m, cow_vregion, frame_id * BASE_PAGE_SIZE, BASE_PAGE_SIZE, VREGION_FLAGS_READ_WRITE); assert(err_is_ok(err)); }
// populates the given buffer with given capref static errval_t populate_buffer(struct buffer_descriptor *buffer, struct capref cap) { buffer->cap = cap; struct frame_identity pa; errval_t err = invoke_frame_identify(cap, &pa); if (!err_is_ok(err)) { printf("invoke_frame_identify failed\n"); abort(); } buffer->pa = pa.base; buffer->bytes = pa.bytes; err = vspace_map_one_frame(&buffer->va, buffer->bytes, cap, NULL, NULL); /* err = vspace_map_one_frame_attr(&buffer->va, (1L << buffer->bits), cap, VREGION_FLAGS_READ_WRITE_NOCACHE, NULL, NULL); */ if (err_is_fail(err)) { DEBUG_ERR(err, "vspace_map_one_frame failed"); // FIXME: report more sensible error return(ETHERSRV_ERR_TOO_MANY_BUFFERS); } netd_buffer_count++; buffer_id_counter++; buffer->buffer_id = buffer_id_counter; // printf("### buffer gets id %"PRIu64"\n", buffer->buffer_id); if (buffer->buffer_id == 3) { first_app_b = buffer; } buffer->next = buffers_list; // Adding the buffer on the top of buffer list. // buffers_list = buffer; return SYS_ERR_OK; } // end function: populate_buffer
static errval_t msg_open_cb(xphi_dom_id_t domain, uint64_t usrdata, struct capref msgframe, uint8_t type) { errval_t err; domid = domain; struct frame_identity id; err = invoke_frame_identify(msgframe, &id); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not identify the frame"); } debug_printf("msg_open_cb | Frame base: %016lx, size=%lx, ud:%lx\n", id.base, 1UL << id.bits, usrdata); remote_frame = msgframe; remote_base = id.base; remote_frame_sz = (1UL << id.bits); err = vspace_map_one_frame(&remote_buf, remote_frame_sz, msgframe, NULL, NULL); if (err_is_fail(err)) { USER_PANIC_ERR(err, "Could not map the frame"); } init_buffer_c0(); connected = 0x1; return SYS_ERR_OK; }
int map_unmap(void) { errval_t err; struct capref mem; DEBUG_MAP_UNMAP("ram_alloc\n"); err = ram_alloc(&mem, BASE_PAGE_BITS); if (err_is_fail(err)) { printf("ram_alloc: %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } struct capref frame; DEBUG_MAP_UNMAP("retype\n"); err = slot_alloc(&frame); if (err_is_fail(err)) { printf("slot_alloc: %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } err = cap_retype(frame, mem, ObjType_Frame, BASE_PAGE_BITS); if (err_is_fail(err)) { printf("cap_retype: %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } DEBUG_MAP_UNMAP("delete ram cap\n"); err = cap_destroy(mem); if (err_is_fail(err)) { printf("cap_delete(mem): %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } struct frame_identity fi; err = invoke_frame_identify(frame, &fi); if (err_is_fail(err)) { printf("frame_identify: %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } DEBUG_MAP_UNMAP("frame: base = 0x%"PRIxGENPADDR", bits = %d\n", fi.base, fi.bits); #ifdef NKMTEST_DEBUG_MAP_UNMAP dump_pmap(get_current_pmap()); #endif struct vregion *vr; struct memobj *memobj; void *vaddr; DEBUG_MAP_UNMAP("map\n"); err = vspace_map_one_frame(&vaddr, BASE_PAGE_SIZE, frame, &memobj, &vr); if (err_is_fail(err)) { printf("vspace_map_one_frame: %s (%"PRIuERRV")\n", err_getstring(err), err); } char *memory = vaddr; DEBUG_MAP_UNMAP("vaddr = %p\n", vaddr); #ifdef NKMTEST_DEBUG_MAP_UNMAP dump_pmap(get_current_pmap()); #endif DEBUG_MAP_UNMAP("write 1\n"); int i; for (i = 0; i < BASE_PAGE_SIZE; i++) { memory[i] = i % INT8_MAX; } DEBUG_MAP_UNMAP("verify 1\n"); for (i = 0; i < BASE_PAGE_SIZE; i++) { assert(memory[i] == i % INT8_MAX); } DEBUG_MAP_UNMAP("delete frame cap\n"); err = cap_destroy(frame); if (err_is_fail(err)) { printf("cap_delete(frame): %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } #ifdef NKMTEST_DEBUG_MAP_UNMAP // no mapping should remain here dump_pmap(get_current_pmap()); err = debug_dump_hw_ptables(); if (err_is_fail(err)) { printf("kernel dump ptables: %s (%"PRIuERRV")\n", err_getstring(err), err); return 1; } #endif printf("%s: done\n", __FUNCTION__); return 0; }
errval_t spawn_xcore_monitor(coreid_t coreid, int hwid, enum cpu_type cpu_type, const char *cmdline, struct frame_identity urpc_frame_id, struct capref kcb) { uint64_t start = 0; const char *monitorname = NULL, *cpuname = NULL; genpaddr_t arch_page_size; errval_t err; err = get_architecture_config(cpu_type, &arch_page_size, &monitorname, &cpuname); assert(err_is_ok(err)); DEBUG("loading kernel: %s\n", cpuname); DEBUG("loading 1st app: %s\n", monitorname); // compute size of frame needed and allocate it DEBUG("%s:%s:%d: urpc_frame_id.base=%"PRIxGENPADDR"\n", __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.base); DEBUG("%s:%s:%d: urpc_frame_id.size=%d\n", __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.bits); if (benchmark_flag) { start = bench_tsc(); } static size_t cpu_binary_size; static lvaddr_t cpu_binary = 0; static genpaddr_t cpu_binary_phys; static const char* cached_cpuname = NULL; if (cpu_binary == 0) { cached_cpuname = cpuname; // XXX: Caching these for now, until we have unmap err = lookup_module(cpuname, &cpu_binary, &cpu_binary_phys, &cpu_binary_size); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not lookup module"); return err; } } // Ensure caching actually works and we're // always loading same binary. If this starts to fail, get rid of caching. assert (strcmp(cached_cpuname, cpuname) == 0); static size_t monitor_binary_size; static lvaddr_t monitor_binary = 0; static genpaddr_t monitor_binary_phys; static const char* cached_monitorname = NULL; if (monitor_binary == 0) { cached_monitorname = monitorname; // XXX: Caching these for now, until we have unmap err = lookup_module(monitorname, &monitor_binary, &monitor_binary_phys, &monitor_binary_size); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not lookup module"); return err; } } // Again, ensure caching actually worked (see above) assert (strcmp(cached_monitorname, monitorname) == 0); if (benchmark_flag) { bench_data->load = bench_tsc() - start; start = bench_tsc(); } struct capref cpu_memory_cap; struct frame_identity frameid; size_t cpu_memory; err = allocate_kernel_memory(cpu_binary, arch_page_size, &cpu_memory_cap, &cpu_memory, &frameid); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not allocate space for new app kernel."); return err; } err = cap_mark_remote(cpu_memory_cap); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not mark cap remote."); return err; } void *cpu_buf_memory; err = vspace_map_one_frame(&cpu_buf_memory, cpu_memory, cpu_memory_cap, NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_VSPACE_MAP); } if (benchmark_flag) { bench_data->alloc_cpu = bench_tsc() - start; start = bench_tsc(); } /* Chunk of memory to load monitor on the app core */ struct capref spawn_memory_cap; struct frame_identity spawn_memory_identity; err = frame_alloc_identify(&spawn_memory_cap, X86_CORE_DATA_PAGES * arch_page_size, NULL, &spawn_memory_identity); if (err_is_fail(err)) { return err_push(err, LIB_ERR_FRAME_ALLOC); } err = cap_mark_remote(spawn_memory_cap); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not mark cap remote."); return err; } if (benchmark_flag) { bench_data->alloc_mon = bench_tsc() - start; start = bench_tsc(); } /* Load cpu */ struct elf_allocate_state state; state.vbase = (char *)cpu_buf_memory + arch_page_size; assert(sizeof(struct x86_core_data) <= arch_page_size); state.elfbase = elf_virtual_base(cpu_binary); struct Elf64_Ehdr *cpu_head = (struct Elf64_Ehdr *)cpu_binary; genvaddr_t cpu_entry; err = elf_load(cpu_head->e_machine, elfload_allocate, &state, cpu_binary, cpu_binary_size, &cpu_entry); if (err_is_fail(err)) { return err; } if (benchmark_flag) { bench_data->elf_load = bench_tsc() - start; start = bench_tsc(); } err = relocate_cpu_binary(cpu_binary, cpu_head, state, frameid, arch_page_size); if (err_is_fail(err)) { DEBUG_ERR(err, "Can not relocate new kernel."); return err; } if (benchmark_flag) { bench_data->elf_reloc = bench_tsc() - start; } genvaddr_t cpu_reloc_entry = cpu_entry - state.elfbase + frameid.base + arch_page_size; /* Compute entry point in the foreign address space */ forvaddr_t foreign_cpu_reloc_entry = (forvaddr_t)cpu_reloc_entry; /* Setup the core_data struct in the new kernel */ struct x86_core_data *core_data = (struct x86_core_data *)cpu_buf_memory; switch (cpu_head->e_machine) { case EM_X86_64: case EM_K1OM: core_data->elf.size = sizeof(struct Elf64_Shdr); core_data->elf.addr = cpu_binary_phys + (uintptr_t)cpu_head->e_shoff; core_data->elf.num = cpu_head->e_shnum; break; case EM_386: core_data->elf.size = sizeof(struct Elf32_Shdr); struct Elf32_Ehdr *head32 = (struct Elf32_Ehdr *)cpu_binary; core_data->elf.addr = cpu_binary_phys + (uintptr_t)head32->e_shoff; core_data->elf.num = head32->e_shnum; break; default: return SPAWN_ERR_UNKNOWN_TARGET_ARCH; } core_data->module_start = cpu_binary_phys; core_data->module_end = cpu_binary_phys + cpu_binary_size; core_data->urpc_frame_base = urpc_frame_id.base; core_data->urpc_frame_bits = urpc_frame_id.bits; core_data->monitor_binary = monitor_binary_phys; core_data->monitor_binary_size = monitor_binary_size; core_data->memory_base_start = spawn_memory_identity.base; core_data->memory_bits = spawn_memory_identity.bits; core_data->src_core_id = disp_get_core_id(); core_data->src_arch_id = my_arch_id; core_data->dst_core_id = coreid; struct frame_identity fid; err = invoke_frame_identify(kcb, &fid); if (err_is_fail(err)) { USER_PANIC_ERR(err, "Invoke frame identity for KCB failed. " "Did you add the syscall handler for that architecture?"); } DEBUG("%s:%s:%d: fid.base is 0x%"PRIxGENPADDR"\n", __FILE__, __FUNCTION__, __LINE__, fid.base); core_data->kcb = (genpaddr_t) fid.base; #ifdef CONFIG_FLOUNDER_BACKEND_UMP_IPI core_data->chan_id = chanid; #endif if (cmdline != NULL) { // copy as much of command line as will fit snprintf(core_data->kernel_cmdline, sizeof(core_data->kernel_cmdline), "%s %s", cpuname, cmdline); // ensure termination core_data->kernel_cmdline[sizeof(core_data->kernel_cmdline) - 1] = '\0'; DEBUG("%s:%s:%d: %s\n", __FILE__, __FUNCTION__, __LINE__, core_data->kernel_cmdline); } /* Invoke kernel capability to boot new core */ if (cpu_type == CPU_X86_64 || cpu_type == CPU_K1OM) { start_aps_x86_64_start(hwid, foreign_cpu_reloc_entry); } #ifndef __k1om__ else if (cpu_type == CPU_X86_32) { start_aps_x86_32_start(hwid, foreign_cpu_reloc_entry); } #endif /* Clean up */ // XXX: Should not delete the remote caps? err = cap_destroy(spawn_memory_cap); if (err_is_fail(err)) { USER_PANIC_ERR(err, "cap_destroy failed"); } err = vspace_unmap(cpu_buf_memory); if (err_is_fail(err)) { USER_PANIC_ERR(err, "vspace unmap CPU driver memory failed"); } err = cap_destroy(cpu_memory_cap); if (err_is_fail(err)) { USER_PANIC_ERR(err, "cap_destroy failed"); } return SYS_ERR_OK; }
int start_aps_x86_32_start(uint8_t core_id, genvaddr_t entry) { DEBUG("%s:%d: start_aps_x86_32_start\n", __FILE__, __LINE__); // Copy the startup code to the real-mode address uint8_t *real_src = (uint8_t *) &x86_32_start_ap; uint8_t *real_end = (uint8_t *) &x86_32_start_ap_end; struct capref bootcap; struct acpi_rpc_client* acl = get_acpi_rpc_client(); errval_t error_code; errval_t err = acl->vtbl.mm_realloc_range_proxy(acl, 16, 0x0, &bootcap, &error_code); if (err_is_fail(err)) { USER_PANIC_ERR(err, "mm_alloc_range_proxy failed."); } if (err_is_fail(error_code)) { USER_PANIC_ERR(error_code, "mm_alloc_range_proxy return failed."); } void* real_base; err = vspace_map_one_frame(&real_base, 1<<16, bootcap, NULL, NULL); uint8_t* real_dest = (uint8_t*)real_base + X86_32_REAL_MODE_LINEAR_OFFSET; memcpy(real_dest, real_src, real_end - real_src); /* Pointer to the entry point called from init_ap.S */ volatile uint64_t *absolute_entry_ptr = (volatile uint64_t *) (( (lpaddr_t) &x86_32_init_ap_absolute_entry - (lpaddr_t) &x86_32_start_ap ) + real_dest); //copy the address of the function start (in boot.S) to the long-mode //assembler code to be able to perform an absolute jump *absolute_entry_ptr = entry; // pointer to the shared global variable amongst all kernels volatile uint64_t *ap_global = (volatile uint64_t *) (( (lpaddr_t) &x86_32_init_ap_global - (lpaddr_t) &x86_32_start_ap ) + real_dest); genpaddr_t global; struct monitor_blocking_rpc_client *mc = get_monitor_blocking_rpc_client(); err = mc->vtbl.get_global_paddr(mc, &global); if (err_is_fail(err)) { DEBUG_ERR(err, "invoke spawn core"); return err_push(err, MON_ERR_SPAWN_CORE); } *ap_global = (uint64_t)(genpaddr_t)global; // pointer to the pseudo-lock used to detect boot up of new core volatile uint32_t *ap_wait = (volatile uint32_t *) ((lpaddr_t) &x86_32_init_ap_wait - ((lpaddr_t) &x86_32_start_ap) + real_dest); // Pointer to the lock variable in the realmode code volatile uint8_t *ap_lock = (volatile uint8_t *) ((lpaddr_t) &x86_32_init_ap_lock - ((lpaddr_t) &x86_32_start_ap) + real_dest); *ap_wait = AP_STARTING_UP; end = bench_tsc(); err = invoke_send_init_ipi(ipi_cap, core_id); if (err_is_fail(err)) { DEBUG_ERR(err, "invoke send init ipi"); return err; } err = invoke_send_start_ipi(ipi_cap, core_id, entry); if (err_is_fail(err)) { DEBUG_ERR(err, "invoke sipi"); return err; } //give the new core a bit time to start-up and set the lock for (uint64_t i = 0; i < STARTUP_TIMEOUT; i++) { if (*ap_lock != 0) { break; } } // If the lock is set, the core has been started, otherwise assume, that // a core with this APIC ID doesn't exist. if (*ap_lock != 0) { while (*ap_wait != AP_STARTED); trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_CORE_START_REQUEST_ACK, core_id); *ap_lock = 0; return 0; } assert(!"badness"); return -1; }
/** * \brief Setup arguments and environment * * \param argv Command-line arguments, NULL-terminated * \param envp Environment, NULL-terminated */ static errval_t spawn_setup_env(struct spawninfo *si, char *const argv[], char *const envp[]) { errval_t err; // Create frame (actually multiple pages) for arguments si->argspg.cnode = si->taskcn; si->argspg.slot = TASKCN_SLOT_ARGSPAGE; struct capref spawn_argspg = { .cnode = si->taskcn, .slot = TASKCN_SLOT_ARGSPAGE2, }; err = frame_create(si->argspg, ARGS_SIZE, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_ARGSPG); } err = cap_copy(spawn_argspg, si->argspg); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_ARGSPG); } /* Map in args frame */ genvaddr_t spawn_args_base; err = spawn_vspace_map_one_frame(si, &spawn_args_base, spawn_argspg, ARGS_SIZE); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_ARGSPG_TO_NEW); } void *argspg; err = vspace_map_one_frame(&argspg, ARGS_SIZE, si->argspg, NULL, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_ARGSPG_TO_SELF); } /* Layout of arguments page: * struct spawn_domain_params; // contains pointers to other fields * char buf[]; // NUL-terminated strings for arguments and environment * vspace layout data follows the string data */ struct spawn_domain_params *params = argspg; char *buf = (char *)(params + 1); size_t buflen = ARGS_SIZE - (buf - (char *)argspg); /* Copy command-line arguments */ int i; size_t len; for (i = 0; argv[i] != NULL; i++) { len = strlen(argv[i]) + 1; if (len > buflen) { return SPAWN_ERR_ARGSPG_OVERFLOW; } strcpy(buf, argv[i]); params->argv[i] = buf - (char *)argspg + (char *)(lvaddr_t)spawn_args_base; buf += len; buflen -= len; } assert(i <= MAX_CMDLINE_ARGS); int argc = i; params->argv[i] = NULL; /* Copy environment strings */ for (i = 0; envp[i] != NULL; i++) { len = strlen(envp[i]) + 1; if (len > buflen) { return SPAWN_ERR_ARGSPG_OVERFLOW; } strcpy(buf, envp[i]); params->envp[i] = buf - (char *)argspg + (char *)(lvaddr_t)spawn_args_base; buf += len; buflen -= len; } assert(i <= MAX_ENVIRON_VARS); params->envp[i] = NULL; /* Serialise vspace data */ // XXX: align buf to next word char *vspace_buf = (char *)ROUND_UP((lvaddr_t)buf, sizeof(uintptr_t)); buflen -= vspace_buf - buf; // FIXME: currently just the pmap is serialised err = si->vspace->pmap->f.serialise(si->vspace->pmap, vspace_buf, buflen); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_SERIALISE_VSPACE); } /* Setup environment pointer and vspace pointer */ params->argc = argc; params->vspace_buf = (char *)vspace_buf - (char *)argspg + (char *)(lvaddr_t)spawn_args_base; params->vspace_buf_len = buflen; // Setup TLS data params->tls_init_base = (void *)vspace_genvaddr_to_lvaddr(si->tls_init_base); params->tls_init_len = si->tls_init_len; params->tls_total_len = si->tls_total_len; arch_registers_state_t *enabled_area = dispatcher_get_enabled_save_area(si->handle); registers_set_param(enabled_area, (uintptr_t)spawn_args_base); return SYS_ERR_OK; } /** * Copies caps from inheritcnode into destination cnode, * ignores caps that to not exist. * * \param inheritcn Source cnode * \param inherit_slot Source cnode slot * \param destcn Target cnode * \param destcn_slot Target cnode slot * * \retval SYS_ERR_OK Copy to target was successful or source cap * did not exist. * \retval SPAWN_ERR_COPY_INHERITCN_CAP Error in cap_copy */ static errval_t spawn_setup_inherited_cap(struct cnoderef inheritcn, capaddr_t inherit_slot, struct cnoderef destcn, capaddr_t destcn_slot) { errval_t err; struct capref src; src.cnode = inheritcn; src.slot = inherit_slot;; // Create frame (actually multiple pages) for fds struct capref dest; dest.cnode = destcn; dest.slot = destcn_slot; err = cap_copy(dest, src); if (err_no(err) == SYS_ERR_SOURCE_CAP_LOOKUP) { // there was no fdcap to inherit, continue return SYS_ERR_OK; } else if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_COPY_INHERITCN_CAP); } return SYS_ERR_OK; }
/** * \brief Setup the dispatcher frame */ static errval_t spawn_setup_dispatcher(struct spawninfo *si, coreid_t core_id, const char *name, genvaddr_t entry, void* arch_info) { errval_t err; /* Create dispatcher frame (in taskcn) */ si->dispframe.cnode = si->taskcn; si->dispframe.slot = TASKCN_SLOT_DISPFRAME; struct capref spawn_dispframe = { .cnode = si->taskcn, .slot = TASKCN_SLOT_DISPFRAME2, }; err = frame_create(si->dispframe, (1 << DISPATCHER_FRAME_BITS), NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_DISPATCHER_FRAME); } err = cap_copy(spawn_dispframe, si->dispframe); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_DISPATCHER_FRAME); } /* Map in dispatcher frame */ dispatcher_handle_t handle; err = vspace_map_one_frame((void**)&handle, 1ul << DISPATCHER_FRAME_BITS, si->dispframe, NULL, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_DISPATCHER_TO_SELF); } genvaddr_t spawn_dispatcher_base; err = spawn_vspace_map_one_frame(si, &spawn_dispatcher_base, spawn_dispframe, 1UL << DISPATCHER_FRAME_BITS); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_DISPATCHER_TO_NEW); } /* Set initial state */ // XXX: Confusion address translation about l/gen/addr in entry struct dispatcher_shared_generic *disp = get_dispatcher_shared_generic(handle); struct dispatcher_generic *disp_gen = get_dispatcher_generic(handle); arch_registers_state_t *enabled_area = dispatcher_get_enabled_save_area(handle); arch_registers_state_t *disabled_area = dispatcher_get_disabled_save_area(handle); /* Place core_id */ disp_gen->core_id = core_id; /* place eh information */ disp_gen->eh_frame = si->eh_frame; disp_gen->eh_frame_size = si->eh_frame_size; disp_gen->eh_frame_hdr = si->eh_frame_hdr; disp_gen->eh_frame_hdr_size = si->eh_frame_hdr_size; /* Setup dispatcher and make it runnable */ disp->udisp = spawn_dispatcher_base; disp->disabled = 1; disp->fpu_trap = 1; #ifdef __k1om__ disp->xeon_phi_id = disp_xeon_phi_id(); #endif // Copy the name for debugging const char *copy_name = strrchr(name, '/'); if (copy_name == NULL) { copy_name = name; } else { copy_name++; } strncpy(disp->name, copy_name, DISP_NAME_LEN); spawn_arch_set_registers(arch_info, handle, enabled_area, disabled_area); registers_set_entry(disabled_area, entry); si->handle = handle; return SYS_ERR_OK; } errval_t spawn_map_bootinfo(struct spawninfo *si, genvaddr_t *retvaddr) { errval_t err; struct capref src = { .cnode = cnode_task, .slot = TASKCN_SLOT_BOOTINFO }; struct capref dest = { .cnode = si->taskcn, .slot = TASKCN_SLOT_BOOTINFO }; err = cap_copy(dest, src); if (err_is_fail(err)) { return err_push(err, LIB_ERR_CAP_COPY); } err = spawn_vspace_map_one_frame(si, retvaddr, dest, BOOTINFO_SIZE); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_BOOTINFO); } return SYS_ERR_OK; } /** * \brief Retrive the commandline args of #name * * The arguments are malloced into a new space so need to be freed after use */ errval_t spawn_get_cmdline_args(struct mem_region *module, char **retargs) { assert(module != NULL && retargs != NULL); /* Get the cmdline args */ const char *args = getopt_module(module); /* Allocate space */ *retargs = malloc(sizeof(char) * strlen(args)); if (!retargs) { return LIB_ERR_MALLOC_FAIL; } /* Copy args */ strcpy(*retargs, args); return SYS_ERR_OK; }
/** * \brief initializes a thread on the given core * * \@param core ID of the core on which to create the tread on * \param stack_size size of the stack of the tread to be created * \param thread pointer to the thread struct to create * * \returns SYS_ERR_OK on SUCCESS * errval on FAILURE */ errval_t bomp_thread_init(coreid_t core, size_t stack_size, struct bomp_thread *thread) { errval_t err; BOMP_DEBUG_THREAD("Creating thread on core %"PRIuCOREID " \n", core); uint32_t done; err = domain_new_dispatcher(core, bomp_thread_init_done, &done); if (err_is_fail(err)) { BOMP_ERROR("creating new dispatcher on core %" PRIuCOREID "failed\n", core); return err; } while(!done) { thread_yield(); } BOMP_DEBUG_THREAD("dispatcher ready. allocating memory for msg channel\n"); size_t msg_frame_size; err = frame_alloc(&thread->msgframe, 2 * BOMP_CHANNEL_SIZE, &msg_frame_size); if (err_is_fail(err)) { return err; } err = vspace_map_one_frame(&thread->msgbuf, msg_frame_size, thread->msgframe, NULL, NULL); if (err_is_fail(err)) { return err; } struct bomp_frameinfo fi = { .sendbase = (lpaddr_t)thread->msgbuf + BOMP_CHANNEL_SIZE, .inbuf = thread->msgbuf, .inbufsize = BOMP_CHANNEL_SIZE, .outbuf = ((uint8_t *) thread->msgbuf) + BOMP_CHANNEL_SIZE, .outbufsize = BOMP_CHANNEL_SIZE }; BOMP_DEBUG_THREAD("creating channel on %p\n", thread->msgbuf); err = bomp_accept(&fi, thread, bomp_thread_accept_cb, get_default_waitset(), IDC_EXPORT_FLAGS_DEFAULT); if (err_is_fail(err)) { // XXX> error handling return err; } BOMP_DEBUG_THREAD("creating thread on core %" PRIuCOREID "\n", core); err = domain_thread_create_on(core, bomp_thread_msg_handler, thread->msgbuf); if (err_is_fail(err)) { // XXX> error handling return err; } while (thread->ctrl == NULL) { err = event_dispatch(get_default_waitset()); if (err_is_fail(err)) { USER_PANIC_ERR(err, "event dispatch\n"); } } BOMP_DEBUG_THREAD("thread on core %" PRIuCOREID " connected \n", core); return thread->thread_err; } errval_t bomp_thread_exec(struct bomp_thread *thread, bomp_thread_fn_t fn, void *arg, uint32_t tid) { debug_printf("bomp_thread_exec(%p, %p, %p, %u) %p\n", thread, fn, arg, tid, thread->icvt); struct txq_msg_st *msg_st = txq_msg_st_alloc(&thread->txq); if (msg_st == NULL) { return LIB_ERR_MALLOC_FAIL; } uint32_t msg_sent = 0; msg_st->send = execute__tx; msg_st->cleanup = (txq_cleanup_fn_t)txq_msg_sent_cb; struct bomp_msg_st *bomp_msg_st = (struct bomp_msg_st *)msg_st; bomp_msg_st->args.exec.arg = (uint64_t)arg; bomp_msg_st->args.exec.fn = (uint64_t)fn; bomp_msg_st->args.exec.tid = tid; bomp_msg_st->args.exec.icv = (uint64_t)thread->icvt; bomp_msg_st->message_sent = &msg_sent; txq_send(msg_st); while(msg_sent == 0) { event_dispatch(get_default_waitset()); } //return event_dispatch_non_block(get_default_waitset()); return SYS_ERR_OK; }
static errval_t init_allocators(void) { errval_t err, msgerr; struct monitor_blocking_rpc_client *cl = get_monitor_blocking_rpc_client(); assert(cl != NULL); // Get the bootinfo and map it in. struct capref bootinfo_frame; size_t bootinfo_size; struct bootinfo *bootinfo; msgerr = cl->vtbl.get_bootinfo(cl, &err, &bootinfo_frame, &bootinfo_size); if (err_is_fail(msgerr) || err_is_fail(err)) { USER_PANIC_ERR(err_is_fail(msgerr) ? msgerr : err, "failed in get_bootinfo"); } err = vspace_map_one_frame((void**)&bootinfo, bootinfo_size, bootinfo_frame, NULL, NULL); assert(err_is_ok(err)); /* Initialize the memory allocator to handle PhysAddr caps */ static struct range_slot_allocator devframes_allocator; err = range_slot_alloc_init(&devframes_allocator, PCI_CNODE_SLOTS, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_SLOT_ALLOC_INIT); } err = mm_init(&pci_mm_physaddr, ObjType_DevFrame, 0, 48, /* This next parameter is important. It specifies the maximum * amount that a cap may be "chunked" (i.e. broken up) at each * level in the allocator. Setting it higher than 1 reduces the * memory overhead of keeping all the intermediate caps around, * but leads to problems if you chunk up a cap too small to be * able to allocate a large subregion. This caused problems * for me with a large framebuffer... -AB 20110810 */ 1, /*was DEFAULT_CNODE_BITS,*/ slab_default_refill, slot_alloc_dynamic, &devframes_allocator, false); if (err_is_fail(err)) { return err_push(err, MM_ERR_MM_INIT); } // Request I/O Cap struct capref requested_caps; errval_t error_code; err = cl->vtbl.get_io_cap(cl, &requested_caps, &error_code); assert(err_is_ok(err) && err_is_ok(error_code)); // Copy into correct slot struct capref caps_io = { .cnode = cnode_task, .slot = TASKCN_SLOT_IO }; err = cap_copy(caps_io, requested_caps); // XXX: The code below is confused about gen/l/paddrs. // Caps should be managed in genpaddr, while the bus mgmt must be in lpaddr. err = cl->vtbl.get_phyaddr_cap(cl, &requested_caps, &error_code); assert(err_is_ok(err) && err_is_ok(error_code)); physical_caps = requested_caps; // Build the capref for the first physical address capability struct capref phys_cap; phys_cap.cnode = build_cnoderef(requested_caps, PHYSADDRCN_BITS); phys_cap.slot = 0; struct cnoderef devcnode; err = slot_alloc(&my_devframes_cnode); assert(err_is_ok(err)); cslot_t slots; err = cnode_create(&my_devframes_cnode, &devcnode, 255, &slots); if (err_is_fail(err)) { USER_PANIC_ERR(err, "cnode create"); } struct capref devframe; devframe.cnode = devcnode; devframe.slot = 0; for (int i = 0; i < bootinfo->regions_length; i++) { struct mem_region *mrp = &bootinfo->regions[i]; if (mrp->mr_type == RegionType_Module) { skb_add_fact("memory_region(16'%" PRIxGENPADDR ",%u,%zu,%u,%tu).", mrp->mr_base, 0, mrp->mrmod_size, mrp->mr_type, mrp->mrmod_data); } else { skb_add_fact("memory_region(16'%" PRIxGENPADDR ",%u,%zu,%u,%tu).", mrp->mr_base, mrp->mr_bits, ((size_t)1) << mrp->mr_bits, mrp->mr_type, mrp->mrmod_data); } if (mrp->mr_type == RegionType_PhyAddr || mrp->mr_type == RegionType_PlatformData) { ACPI_DEBUG("Region %d: %"PRIxGENPADDR" - %"PRIxGENPADDR" %s\n", i, mrp->mr_base, mrp->mr_base + (((size_t)1)<<mrp->mr_bits), mrp->mr_type == RegionType_PhyAddr ? "physical address" : "platform data"); err = cap_retype(devframe, phys_cap, ObjType_DevFrame, mrp->mr_bits); if (err_no(err) == SYS_ERR_REVOKE_FIRST) { printf("cannot retype region %d: need to revoke first; ignoring it\n", i); } else { assert(err_is_ok(err)); err = mm_add(&pci_mm_physaddr, devframe, mrp->mr_bits, mrp->mr_base); if (err_is_fail(err)) { USER_PANIC_ERR(err, "adding region %d FAILED\n", i); } } phys_cap.slot++; devframe.slot++; } } return SYS_ERR_OK; }
/** * \brief Since we cannot dynamically grow our stack yet, we need a * verion that will create threads on remote core with variable stack size * * \bug this is a hack */ static errval_t domain_new_dispatcher_varstack(coreid_t core_id, domain_spanned_callback_t callback, void *callback_arg, size_t stack_size) { assert(core_id != disp_get_core_id()); errval_t err; struct domain_state *domain_state = get_domain_state(); struct monitor_binding *mb = get_monitor_binding(); assert(domain_state != NULL); /* Set reply handler */ mb->rx_vtbl.span_domain_reply = span_domain_reply; while(domain_state->iref == 0) { /* If not initialized, wait */ messages_wait_and_handle_next(); } /* Create the remote_core_state passed to the new dispatcher */ struct remote_core_state *remote_core_state = calloc(1, sizeof(struct remote_core_state)); if (!remote_core_state) { return LIB_ERR_MALLOC_FAIL; } remote_core_state->core_id = disp_get_core_id(); remote_core_state->iref = domain_state->iref; /* get the alignment of the morecore state */ struct morecore_state *state = get_morecore_state(); remote_core_state->pagesize = state->mmu_state.alignment; /* Create the thread for the new dispatcher to init on */ struct thread *newthread = thread_create_unrunnable(remote_core_init_enabled, (void*)remote_core_state, stack_size); if (newthread == NULL) { return LIB_ERR_THREAD_CREATE; } /* Save the state for later steps of the spanning state machine */ struct span_domain_state *span_domain_state = malloc(sizeof(struct span_domain_state)); if (!span_domain_state) { return LIB_ERR_MALLOC_FAIL; } span_domain_state->thread = newthread; span_domain_state->core_id = core_id; span_domain_state->callback = callback; span_domain_state->callback_arg = callback_arg; /* Give remote_core_state pointer to span_domain_state */ remote_core_state->span_domain_state = span_domain_state; /* Start spanning domain state machine by sending vroot to the monitor */ struct capref vroot = { .cnode = cnode_page, .slot = 0 }; /* Create new dispatcher frame */ struct capref frame; size_t dispsize = ((size_t)1) << DISPATCHER_FRAME_BITS; err = frame_alloc(&frame, dispsize, &dispsize); if (err_is_fail(err)) { return err_push(err, LIB_ERR_FRAME_ALLOC); } lvaddr_t dispaddr; err = vspace_map_one_frame((void **)&dispaddr, dispsize, frame, NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_VSPACE_MAP); } dispatcher_handle_t handle = dispaddr; struct dispatcher_shared_generic *disp = get_dispatcher_shared_generic(handle); struct dispatcher_generic *disp_gen = get_dispatcher_generic(handle); arch_registers_state_t *disabled_area = dispatcher_get_disabled_save_area(handle); /* Set dispatcher on the newthread */ span_domain_state->thread->disp = handle; span_domain_state->frame = frame; span_domain_state->vroot = vroot; /* Setup dispatcher */ disp->udisp = (lvaddr_t)handle; disp->disabled = true; disp->fpu_trap = 1; disp_gen->core_id = span_domain_state->core_id; // Setup the dispatcher to run remote_core_init_disabled // and pass the created thread as an argument registers_set_initial(disabled_area, span_domain_state->thread, (lvaddr_t)remote_core_init_disabled, (lvaddr_t)&disp_gen->stack[DISPATCHER_STACK_WORDS], (uintptr_t)span_domain_state->thread, 0, 0, 0); // Give dispatcher a unique name for debugging snprintf(disp->name, DISP_NAME_LEN, "%s%d", disp_name(), span_domain_state->core_id); #ifdef __x86_64__ // XXX: share LDT state between all dispatchers // this needs to happen before the remote core starts, otherwise the segment // selectors in the new thread state are invalid struct dispatcher_shared_x86_64 *disp_x64 = get_dispatcher_shared_x86_64(handle); struct dispatcher_shared_x86_64 *mydisp_x64 = get_dispatcher_shared_x86_64(curdispatcher()); disp_x64->ldt_base = mydisp_x64->ldt_base; disp_x64->ldt_npages = mydisp_x64->ldt_npages; #endif threads_prepare_to_span(handle); // Setup new local thread for inter-dispatcher messages, if not already done static struct thread *interdisp_thread = NULL; if(interdisp_thread == NULL) { interdisp_thread = thread_create(interdisp_msg_handler, &domain_state->interdisp_ws); err = thread_detach(interdisp_thread); assert(err_is_ok(err)); } #if 0 // XXX: Tell currently active interdisp-threads to handle default waitset for(int i = 0; i < MAX_CPUS; i++) { struct interdisp_binding *b = domain_state->b[i]; if(disp_get_core_id() != i && b != NULL) { err = b->tx_vtbl.span_slave(b, NOP_CONT); assert(err_is_ok(err)); } } #endif #if 0 /* XXX: create a thread that will handle the default waitset */ if (domain_state->default_waitset_handler == NULL) { domain_state->default_waitset_handler = thread_create(span_slave_thread, NULL); assert(domain_state->default_waitset_handler != NULL); } #endif /* Wait to use the monitor binding */ struct monitor_binding *mcb = get_monitor_binding(); event_mutex_enqueue_lock(&mcb->mutex, &span_domain_state->event_qnode, (struct event_closure) { .handler = span_domain_request_sender_wrapper, .arg = span_domain_state }); #if 1 while(!span_domain_state->initialized) { event_dispatch(get_default_waitset()); } /* Free state */ free(span_domain_state); #endif return SYS_ERR_OK; }
/** * \brief initializes the XOMP worker library * * \param wid Xomp worker id * * \returns SYS_ERR_OK on success * errval on failure */ errval_t xomp_worker_init(xomp_wid_t wid) { errval_t err; worker_id = wid; XWI_DEBUG("initializing worker {%016lx} iref:%u\n", worker_id, svc_iref); #if XOMP_BENCH_WORKER_EN bench_init(); #endif struct capref frame = { .cnode = cnode_root, .slot = ROOTCN_SLOT_ARGCN }; struct frame_identity id; err = invoke_frame_identify(frame, &id); if (err_is_fail(err)) { return err_push(err, XOMP_ERR_INVALID_MSG_FRAME); } size_t frame_size = 0; if (svc_iref) { frame_size = XOMP_TLS_SIZE; } else { frame_size = XOMP_FRAME_SIZE; err = spawn_symval_cache_init(0); if (err_is_fail(err)) { return err; } } if ((1UL << id.bits) < XOMP_TLS_SIZE) { return XOMP_ERR_INVALID_MSG_FRAME; } msgframe = frame; err = vspace_map_one_frame(&msgbuf, frame_size, frame, NULL, NULL); if (err_is_fail(err)) { err_push(err, XOMP_ERR_WORKER_INIT_FAILED); } if (svc_iref) { tls = msgbuf; } else { tls = ((uint8_t *) msgbuf) + XOMP_MSG_FRAME_SIZE; } XWI_DEBUG("messaging frame mapped: [%016lx] @ [%016lx]\n", id.base, (lvaddr_t )msgbuf); struct bomp_thread_local_data *tlsinfo = malloc(sizeof(*tlsinfo)); tlsinfo->thr = thread_self(); tlsinfo->work = (struct bomp_work *) tls; tlsinfo->work->data = tlsinfo->work + 1; g_bomp_state->backend.set_tls(tlsinfo); #ifdef __k1om__ if (worker_id & XOMP_WID_GATEWAY_FLAG) { err = xomp_gateway_init(); } else { if (!svc_iref) { err = xomp_gateway_bind_svc(); } else { err = SYS_ERR_OK; } } if (err_is_fail(err)) { return err; } #endif #ifdef __k1om__ if (!svc_iref) { err = xeon_phi_client_init(disp_xeon_phi_id()); if (err_is_fail(err)) { err_push(err, XOMP_ERR_WORKER_INIT_FAILED); } xeon_phi_client_set_callbacks(&callbacks); } #endif struct waitset *ws = get_default_waitset(); // XXX: disabling DMA on the host as there is no replication used at this moment #if XOMP_WORKER_ENABLE_DMA && defined(__k1om__) /* XXX: use lib numa */ #ifndef __k1om__ uint8_t numanode = 0; if (disp_get_core_id() > 20) { numanode = 1; } err = dma_manager_wait_for_driver(dma_device_type, numanode); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not wait for the DMA driver"); } #endif char svc_name[30]; #ifdef __k1om__ snprintf(svc_name, 30, "%s", XEON_PHI_DMA_SERVICE_NAME); #else snprintf(svc_name, 30, "%s.%u", IOAT_DMA_SERVICE_NAME, numanode); #endif struct dma_client_info dma_info = { .type = DMA_CLIENT_INFO_TYPE_NAME, .device_type = dma_device_type, .args.name = svc_name }; err = dma_client_device_init(&dma_info, &dma_dev); if (err_is_fail(err)) { USER_PANIC_ERR(err, "DMA device initialization"); } #endif if (svc_iref) { err = xomp_bind(svc_iref, master_bind_cb, NULL, ws, IDC_EXPORT_FLAGS_DEFAULT); } else { struct xomp_frameinfo fi = { .sendbase = id.base, .inbuf = ((uint8_t *) msgbuf) + XOMP_MSG_CHAN_SIZE, .inbufsize = XOMP_MSG_CHAN_SIZE, .outbuf = ((uint8_t *) msgbuf), .outbufsize = XOMP_MSG_CHAN_SIZE }; err = xomp_connect(&fi, master_bind_cb, NULL, ws, IDC_EXPORT_FLAGS_DEFAULT); } if (err_is_fail(err)) { /* TODO: Clean up */ return err_push(err, XOMP_ERR_WORKER_INIT_FAILED); } XWI_DEBUG("Waiting until bound to master...\n"); while (!is_bound) { messages_wait_and_handle_next(); } if (xbinding == NULL) { return XOMP_ERR_WORKER_INIT_FAILED; } return SYS_ERR_OK; }
static void impl_test(void) { errval_t err; debug_printf("Doing an implementation test\n"); struct capref frame; err = frame_alloc(&frame, 2 * BUFFER_SIZE, NULL); assert(err_is_ok(err)); struct frame_identity id; err = invoke_frame_identify(frame, &id); assert(err_is_ok(err)); void *buf; err = vspace_map_one_frame(&buf, 1UL << id.bits, frame, NULL, NULL); assert(err_is_ok(err)); memset(buf, 0, 1UL << id.bits); memset(buf, 0xA5, BUFFER_SIZE); struct ioat_dma_req_setup setup = { .type = IOAT_DMA_REQ_TYPE_MEMCPY, .src = id.base, .dst = id.base + BUFFER_SIZE, .bytes = BUFFER_SIZE, .done_cb = impl_test_cb, .arg = buf }; int reps = 10; do { debug_printf("!!!!!! NEW ROUND\n"); err = ioat_dma_request_memcpy(dma_ctrl.devices, &setup); assert(err_is_ok(err)); uint32_t i = 10; while(i--) { ioat_dma_device_poll_channels(dma_ctrl.devices); } }while(reps--); } #endif int main(int argc, char *argv[]) { errval_t err; debug_printf("I/O AT DMA driver started\n"); /* * Parsing of cmdline arguments. * * When started by Kaluga, the last element of the cmdline will contain * the basic PCI information of the device. * VENDORID:DEVICEID:BUS:DEV:FUN */ uint32_t vendor_id, device_id; struct pci_addr addr = { .bus = PCI_ADDR_DONT_CARE, .device = PCI_ADDR_DONT_CARE, .device = PCI_ADDR_DONT_CARE }; enum device_type devtype = IOAT_DEVICE_INVAL; if (argc > 1) { uint32_t parsed = sscanf(argv[argc - 1], "%x:%x:%x:%x:%x", &vendor_id, &device_id, &addr.bus, &addr.device, &addr.function); if (parsed != 5) { DEBUGPRINT("WARNING: cmdline parsing failed. Using PCI Address [0,0,0]"); } else { if (vendor_id != 0x8086) { USER_PANIC("unexpected vendor [%x]", vendor_id); } switch ((device_id & 0xFFF0)) { case PCI_DEVICE_IOAT_IVB0: devtype = IOAT_DEVICE_IVB; break; case PCI_DEVICE_IOAT_HSW0: devtype = IOAT_DEVICE_HSW; break; default: USER_PANIC("unexpected device [%x]", device_id) ; break; } DEBUGPRINT("Initializing I/O AT DMA device with PCI address [%u,%u,%u]\n", addr.bus, addr.device, addr.function); } } else { DEBUGPRINT("WARNING: Initializing I/O AT DMA device with unknown PCI address " "[0,0,0]\n"); } err = ioat_device_discovery(addr, devtype, IOAT_DMA_OPERATION); if (err_is_fail(err)) { USER_PANIC_ERR(err, "DMA Device discovery failed"); } #if DMA_BENCH_RUN_BENCHMARK struct ioat_dma_device *dev = ioat_device_get_next(); dma_bench_run_default(dev); #endif #if IOAT_DMA_OPERATION == IOAT_DMA_OPERATION_SERVICE iref_t svc_iref; char svc_name[30]; uint8_t numa_node = (disp_get_core_id() >= 20); snprintf(svc_name, 30, "%s.%u", IOAT_DMA_SERVICE_NAME, numa_node); err = dma_service_init_with_name(svc_name, &dma_svc_cb, NULL, &svc_iref); if (err_is_fail(err)) { USER_PANIC_ERR(err, "Failed to start the DMA service"); } err = dma_manager_register_driver(0, 1ULL << 40, DMA_DEV_TYPE_IOAT, svc_iref); if (err_is_fail(err)) { USER_PANIC_ERR(err, "Failed to register with the DMA manager\n"); } DEBUGPRINT("Driver registered with DMA manager. Serving requests now.\n"); #endif #if IOAT_DMA_OPERATION == IOAT_DMA_OPERATION_LIBRARY #endif uint8_t idle = 0x1; uint32_t idle_counter = 0xFF; while (1) { err = ioat_device_poll(); switch (err_no(err)) { case DMA_ERR_DEVICE_IDLE: idle = idle && 0x1; break; case SYS_ERR_OK: idle = 0; break; default: debug_printf("I/O AT DMA driver terminated: in poll, %s\n", err_getstring(err)); return err; } err = event_dispatch_non_block(get_default_waitset()); switch (err_no(err)) { case SYS_ERR_OK: idle = 0; break; case LIB_ERR_NO_EVENT: idle &= 1; break; default: debug_printf("I/O AT DMA driver terminated in dispatch, %s\n", err_getstring(err)); return err; } if (idle) { idle_counter--; } if (idle_counter == 0) { idle_counter = 0xFF; thread_yield(); } } return 0; }
/** * \brief Page fault handler * * \param memobj The memory object * \param region The associated vregion * \param offset Offset into memory object of the page fault * \param type The fault type */ static errval_t pagefault(struct memobj *memobj, struct vregion *vregion, genvaddr_t offset, vm_fault_type_t type) { errval_t err; assert(memobj->type == MEMOBJ_VFS); struct memobj_vfs *mv = (struct memobj_vfs *)memobj; struct memobj_anon *anon = &mv->anon; struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); genvaddr_t vregion_off = vregion_get_offset(vregion); assert(vregion_off == 0); // not sure if we handle this correctly // Walk the ordered list to find the matching frame, but don't map it yet struct memobj_frame_list *walk = anon->frame_list; while (walk) { if (offset >= walk->offset && offset < walk->offset + walk->size) { break; } walk = walk->next; } if (walk == NULL) { return LIB_ERR_MEMOBJ_WRONG_OFFSET; } genvaddr_t map_offset = vregion_off + walk->offset; size_t nbytes = walk->size; // how much do we need to read from the file? if (map_offset >= mv->filesize) { // nothing goto do_map; } else if (map_offset + nbytes > mv->filesize) { // limit size of read to maximum mapping (rest is zero-filled) nbytes = mv->filesize - map_offset; } #if 0 debug_printf("fault at offset %lx, mapping at %lx-%lx from file data %lx-%lx\n", offset, vregion_base + map_offset, vregion_base + map_offset + walk->size, map_offset + mv->offset, map_offset + mv->offset + nbytes); #endif // map frame writable at temporary location so that we can safely fill it void *buf; struct memobj *tmp_memobj = NULL; struct vregion *tmp_vregion = NULL; err = vspace_map_one_frame(&buf, walk->size, walk->frame, &tmp_memobj, &tmp_vregion); if (err_is_fail(err)) { DEBUG_ERR(err, "error setting up temp mapping in mmap pagefault handler\n"); return err; // XXX } // seek file handle err = vfs_seek(mv->vh, VFS_SEEK_SET, map_offset + mv->offset); if (err_is_fail(err)) { return err; } // read contents into frame size_t rsize, pos = 0; do { err = vfs_read(mv->vh, (char *)buf + pos, nbytes - pos, &rsize); if (err_is_fail(err)) { break; } pos += rsize; } while(rsize > 0 && pos < nbytes); // destroy temp mappings // FIXME: the API for tearing down mappings is really unclear! is this sufficient? err = vregion_destroy(tmp_vregion); assert(err_is_ok(err)); err = memobj_destroy_one_frame(tmp_memobj); assert(err_is_ok(err)); //free(tmp_vregion); //free(tmp_memobj); do_map: // map at target address with appropriate flags err = pmap->f.map(pmap, vregion_base + map_offset, walk->frame, 0, walk->size, vregion_get_flags(vregion), NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_MAP); } return SYS_ERR_OK; }
int main(int argc, char* argv[]) { size_t size_wanted = 1<<20; size_t runs = 100; struct reset_opt *reset = NULL; struct measure_opt *measure = NULL; bool dump = false; assert(argc>0); if (argc == 1) { usage(argv[0]); return 0; } bool args_ok = true; for (int arg = 1; arg < argc; arg++) { if (strcmp(argv[arg], "help") == 0 || strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) { usage(argv[0]); return 0; } if (strncmp(argv[arg], "size=", 5) == 0) { size_wanted = atol(argv[arg]+5); } if (strncmp(argv[arg], "logsize=", 8) == 0) { size_t logsize = atol(argv[arg]+8); if (logsize > 31) { printf("ERROR: logsize too big\n"); args_ok = false; } else { size_wanted = 1 << logsize; } } else if (strncmp(argv[arg], "count=", 6) == 0) { size_wanted = atol(argv[arg]+6)*sizeof(struct cte); } else if (strncmp(argv[arg], "logcount=", 9) == 0) { size_t logcount = atol(argv[arg]+9); if (logcount > (31-OBJBITS_CTE)) { printf("ERROR: logcount too big\n"); args_ok = false; } else { size_wanted = (1 << logcount)*sizeof(struct cte); } } else if (strncmp(argv[arg], "runs=", 5) == 0) { runs = atol(argv[arg]+5); } else if (strncmp(argv[arg], "reset=", 6) == 0) { char *name = argv[arg]+6; int i; for (i = 0; reset_opts[i].name; i++) { if (strcmp(reset_opts[i].name, name) == 0) { reset = &reset_opts[i]; break; } } if (!reset_opts[i].name) { args_ok = false; printf("ERROR: unkown reset \"%s\"\n", name); } } else if (strncmp(argv[arg], "measure=", 8) == 0) { char *name = argv[arg]+8; if (strcmp(name, "dump") == 0) { measure = NULL; dump = true; } else { int i; for (i = 0; measure_opts[i].name; i++) { if (strcmp(measure_opts[i].name, name) == 0) { measure = &measure_opts[i]; break; } } if (measure_opts[i].name) { dump = false; } else { args_ok = false; printf("ERROR: unkown measure \"%s\"\n", name); } } } else { args_ok = false; printf("ERROR: unkown argument %s\n", argv[arg]); } } if (!args_ok) { usage(argv[0]); return 1; } assert(size_wanted > 0); assert(runs > 0); assert(reset); assert(measure || dump); errval_t err; struct capref frame; size_t size; err = frame_alloc(&frame, size_wanted, &size); assert_err(err, "alloc"); assert(size >= size_wanted); printf("got %lu bytes\n", size); struct memobj *m; struct vregion *v; void *addr; err = vspace_map_one_frame(&addr, size, frame, &m, &v); assert_err(err, "map"); if (dump) { reset_and_dump(addr, size_wanted, runs, reset->fn, reset->name); } else { bench_init(); char *bench_name = malloc(strlen(reset->name)+strlen(measure->name)+2); strcpy(bench_name, reset->name); strcat(bench_name, ":"); strcat(bench_name, measure->name); test(addr, size_wanted, runs, reset->fn, measure->fn, bench_name); free(bench_name); } printf("client done\n"); vregion_destroy(v); cap_destroy(frame); return 0; }
static errval_t get_inherited_fds(void) { errval_t err; /* Map the FD buffer into our address space. * It stays there since the FD data structures will remain in there and be * referenced from the FD table. */ struct capref frame = { .cnode = cnode_task, .slot = TASKCN_SLOT_FDSPAGE, }; void *fdspg; err = vspace_map_one_frame(&fdspg, FDS_SIZE, frame, NULL, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_MAP_FDSPG_TO_SELF); } /* Set up to read the table */ char *p = fdspg; printf("fds at: %p\n", p); int num_fds = *((int*)p); printf("num fds: %d\n", num_fds); struct fd_store *fd; p += sizeof(int); fd = (struct fd_store*)p; p += (sizeof(struct fd_store)*num_fds); /* Process all the FDs passed in the buffer */ int i; for (i = 0; i < num_fds; i++, fd++) { /* add each to our fd table - replacing any fds already there */ struct fdtab_entry fde; fde.type = fd->type; fde.handle = fd->handle; if (fdtab_get(fd->num)->type != FDTAB_TYPE_AVAILABLE) { fdtab_free(fd->num); } fdtab_alloc_from(&fde, fd->num); /* print out some info about the FD */ char *s = ""; switch (fd->type) { case FDTAB_TYPE_AVAILABLE: s = "available"; break; case FDTAB_TYPE_FILE: s = "file"; break; case FDTAB_TYPE_UNIX_SOCKET: s = "unix socket"; break; case FDTAB_TYPE_STDIN: s = "stdin"; break; case FDTAB_TYPE_STDOUT: s = "stdout"; break; case FDTAB_TYPE_STDERR: s = "stderr"; break; case FDTAB_TYPE_LWIP_SOCKET: s = "lwip socket"; break; case FDTAB_TYPE_EPOLL_INSTANCE: s = "epoll instance"; break; case FDTAB_TYPE_PTM: s = "pseudo-terminal master"; break; case FDTAB_TYPE_PTS: s = "pseudo-terminal slave"; break; } printf("fd_store %d: num: %d, type: %d:%s handle: %p\n", i, fd->num, fd->type, s, fd->handle); switch (fd->type) { case FDTAB_TYPE_FILE: print_file_fd((void*)(p + (genpaddr_t)fd->handle)); break; case FDTAB_TYPE_UNIX_SOCKET: print_unixsock_fd((void*)(p + (genpaddr_t)fd->handle)); break; default: printf("[no handle data]\n"); break; } } return SYS_ERR_OK; }