/** * \brief Determine a suitable address for a given memory object * * \param pmap The pmap object * \param memobj The memory object to determine the address for * \param alignment Minimum alignment * \param vaddr Pointer to return the determined address * * Relies on vspace.c code maintaining an ordered list of vregions */ static errval_t determine_addr(struct pmap *pmap, struct memobj *memobj, size_t alignment, genvaddr_t *vaddr) { assert(pmap->vspace->head); assert(alignment <= BASE_PAGE_SIZE); // NYI struct vregion *walk = pmap->vspace->head; while (walk->next) { // Try to insert between existing mappings genvaddr_t walk_base = vregion_get_base_addr(walk); genvaddr_t walk_size = vregion_get_size(walk); genvaddr_t next_base = vregion_get_base_addr(walk->next); if (next_base > walk_base + walk_size + memobj->size && walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN *vaddr = walk_base + walk_size; return SYS_ERR_OK; } walk = walk->next; } *vaddr = vregion_get_base_addr(walk) + vregion_get_size(walk); return SYS_ERR_OK; }
errval_t vspace_mmu_aware_unmap(struct vspace_mmu_aware *state, lvaddr_t base, size_t bytes) { errval_t err; struct capref frame; genvaddr_t gvaddr = vregion_get_base_addr(&state->vregion) + state->offset; lvaddr_t eaddr = vspace_genvaddr_to_lvaddr(gvaddr); genvaddr_t offset; genvaddr_t gen_base = vspace_lvaddr_to_genvaddr(base) - vregion_get_base_addr(&state->vregion); genvaddr_t min_offset = 0; bool success = false; assert(vspace_lvaddr_to_genvaddr(base) >= vregion_get_base_addr(&state->vregion)); assert(base + bytes == (lvaddr_t)eaddr); assert(bytes <= state->consumed); assert(bytes <= state->offset); // Reduce offset state->offset -= bytes; state->consumed -= bytes; // Free only in bigger blocks if(state->mapoffset - state->offset > MIN_MEM_FOR_FREE) { do { // Unmap and return (via unfill) frames from base err = state->memobj.m.f.unfill(&state->memobj.m, gen_base, &frame, &offset); if(err_is_fail(err) && err_no(err) != LIB_ERR_MEMOBJ_UNFILL_TOO_HIGH_OFFSET) { return err_push(err, LIB_ERR_MEMOBJ_UNMAP_REGION); } // Delete frame cap if(err_is_ok(err)) { success = true; if (min_offset == 0 || min_offset > offset) { min_offset = offset; } err = cap_destroy(frame); if(err_is_fail(err)) { return err; } } } while(err != LIB_ERR_MEMOBJ_UNFILL_TOO_HIGH_OFFSET); // state->consumed -= bytes; if (success) { state->mapoffset = min_offset; } } return SYS_ERR_OK; }
/** * \brief Page fault handler * * \param memobj The memory object * \param region The associated vregion * \param offset Offset into memory object of the page fault * \param type The fault type */ static errval_t pagefault(struct memobj *memobj, struct vregion *vregion, genvaddr_t offset, vm_fault_type_t type) { errval_t err; struct memobj_one_frame_lazy *lazy = (struct memobj_one_frame_lazy*)memobj; struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); genvaddr_t vregion_off = vregion_get_offset(vregion); vregion_flags_t flags = vregion_get_flags(vregion); // XXX: ugly --> need to revoke lazy->frame in order to clean up // all the copies that are created here struct capref frame_copy; err = slot_alloc(&frame_copy); if (err_is_fail(err)) { return err; } err = cap_copy(frame_copy, lazy->frame); if (err_is_fail(err)) { return err; } err = pmap->f.map(pmap, vregion_base + vregion_off + offset, frame_copy, offset, lazy->chunk_size, flags, NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_MAP); } return SYS_ERR_OK; }
/** * \brief Page fault handler * * \param memobj The memory object * \param region The associated vregion * \param offset Offset into memory object of the page fault * \param type The fault type */ static errval_t pagefault(struct memobj *memobj, struct vregion *vregion, genvaddr_t offset, vm_fault_type_t type) { errval_t err; struct memobj_one_frame_one_map *state = (struct memobj_one_frame_one_map*)memobj; if (offset < state->offset || offset > state->offset + memobj->size) { return LIB_ERR_MEMOBJ_WRONG_OFFSET; } // Map the single frame struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); genvaddr_t vregion_off = vregion_get_offset(vregion); vregion_flags_t flags = vregion_get_flags(vregion); err = pmap->f.map(pmap, vregion_base + vregion_off, state->frame, state->offset, memobj->size, flags, NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_MAP); } return SYS_ERR_OK; }
// Kludge to push changes in VFS memobj back out to disk errval_t memobj_flush_vfs(struct memobj *memobj, struct vregion *vregion) { errval_t err; assert(memobj->type == MEMOBJ_VFS); struct memobj_vfs *mv = (struct memobj_vfs *)memobj; struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); lvaddr_t vregion_lbase = vspace_genvaddr_to_lvaddr(vregion_base); genvaddr_t vregion_off = vregion_get_offset(vregion); assert(vregion_off == 0); // not sure if we handle this correctly /* TODO: mv->size instead of BASE_PAGE_SIZE?*/ for (genvaddr_t off = 0; off < mv->filesize ; off += BASE_PAGE_SIZE){ genvaddr_t retvaddr; size_t retsize; vregion_flags_t retflags; // For each page check if it's in memory err = pmap->f.lookup(pmap, vregion_base + off, &retvaddr, &retsize, NULL, NULL, &retflags); if (err_is_fail(err)) { continue; // Page not in memory #if 0 /* this optimisation may not be correct if flags were changed -AB */ } else if ((retflags & VREGION_FLAGS_WRITE) == 0) { continue; // Not writable #endif } //TRACE("Flushing page at address: %lx\n", vregion_base + off); // seek file handle err = vfs_seek(mv->vh, VFS_SEEK_SET, off + mv->offset); if (err_is_fail(err)) { return err; } // write contents to file size_t rsize, pos = 0; size_t nbytes = mv->filesize - off; if (nbytes > BASE_PAGE_SIZE) { nbytes = BASE_PAGE_SIZE; } do { err = vfs_write(mv->vh, (char *)vregion_lbase + off + pos, nbytes - pos, &rsize); if (err_is_fail(err)) { return err; } pos += rsize; } while(rsize > 0 && pos < nbytes); assert(pos==nbytes); } return SYS_ERR_OK; }
/** * \brief Allocate some slabs * * \param retbuf Pointer to return the allocated memory * \param slab_type Type of slab the memory is allocated for * * Since this region is used for backing specific slabs, * only those types of slabs can be allocated. */ errval_t vspace_pinned_alloc(void **retbuf, enum slab_type slab_type) { errval_t err; struct pinned_state *state = get_current_pinned_state(); // Select slab type struct slab_allocator *slab; switch(slab_type) { case VREGION_LIST: slab = &state->vregion_list_slab; break; case FRAME_LIST: slab = &state->frame_list_slab; break; default: return LIB_ERR_VSPACE_PINNED_INVALID_TYPE; } thread_mutex_lock(&state->mutex); // Try allocating void *buf = slab_alloc(slab); if (buf == NULL) { // Out of memory, grow struct capref frame; err = frame_alloc(&frame, BASE_PAGE_SIZE, NULL); if (err_is_fail(err)) { thread_mutex_unlock(&state->mutex); DEBUG_ERR(err, "frame_alloc in vspace_pinned_alloc"); return err_push(err, LIB_ERR_FRAME_ALLOC); } err = state->memobj.m.f.fill((struct memobj*)&state->memobj, state->offset, frame, BASE_PAGE_SIZE); if (err_is_fail(err)) { thread_mutex_unlock(&state->mutex); DEBUG_ERR(err, "memobj_fill in vspace_pinned_alloc"); return err_push(err, LIB_ERR_MEMOBJ_FILL); } genvaddr_t gvaddr = vregion_get_base_addr(&state->vregion) + state->offset; void *slab_buf = (void*)vspace_genvaddr_to_lvaddr(gvaddr); slab_grow(slab, slab_buf, BASE_PAGE_SIZE); state->offset += BASE_PAGE_SIZE; // Try again buf = slab_alloc(slab); } thread_mutex_unlock(&state->mutex); if (buf == NULL) { return LIB_ERR_SLAB_ALLOC_FAIL; } else { *retbuf = buf; return SYS_ERR_OK; } }
static errval_t refill_slabs(struct pmap_arm *pmap, size_t request) { errval_t err; /* Keep looping till we have #request slabs */ while (slab_freecount(&pmap->slab) < request) { // Amount of bytes required for #request size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab), sizeof(struct vnode)); /* Get a frame of that size */ struct capref cap; err = frame_alloc(&cap, bytes, &bytes); if (err_is_fail(err)) { return err_push(err, LIB_ERR_FRAME_ALLOC); } /* If we do not have enough slabs to map the frame in, recurse */ size_t required_slabs_for_frame = max_slabs_required(bytes); if (slab_freecount(&pmap->slab) < required_slabs_for_frame) { // If we recurse, we require more slabs than to map a single page assert(required_slabs_for_frame > 4); err = refill_slabs(pmap, required_slabs_for_frame); if (err_is_fail(err)) { return err_push(err, LIB_ERR_SLAB_REFILL); } } /* Perform mapping */ genvaddr_t genvaddr = pmap->vregion_offset; pmap->vregion_offset += (genvaddr_t)bytes; // if this assert fires, increase META_DATA_RESERVED_SPACE assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) + vregion_get_size(&pmap->vregion))); err = do_map(pmap, genvaddr, cap, 0, bytes, VREGION_FLAGS_READ_WRITE, NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } /* Grow the slab */ lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr); slab_grow(&pmap->slab, (void*)buf, bytes); } return SYS_ERR_OK; }
static errval_t protect(struct memobj *memobj, struct vregion *vregion, genvaddr_t offset, size_t range, vs_prot_flags_t flags) { struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t base = vregion_get_base_addr(vregion); genvaddr_t vregion_offset = vregion_get_offset(vregion); errval_t err; size_t ret_size; err = pmap->f.modify_flags(pmap, base + offset + vregion_offset, range, flags, &ret_size); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS); } return SYS_ERR_OK; }
/// Map with an alignment constraint errval_t vspace_map_anon_nomalloc(void **retaddr, struct memobj_anon *memobj, struct vregion *vregion, size_t size, size_t *retsize, vregion_flags_t flags, size_t alignment) { errval_t err1, err2; size = ROUND_UP(size, BASE_PAGE_SIZE); if (retsize) { *retsize = size; } // Create a memobj and vregion err1 = memobj_create_anon(memobj, size, 0); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_MEMOBJ_CREATE_ANON); goto error; } err1 = vregion_map_aligned(vregion, get_current_vspace(), (struct memobj *)memobj, 0, size, flags, alignment); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_VREGION_MAP); goto error; } *retaddr = (void*)vspace_genvaddr_to_lvaddr(vregion_get_base_addr(vregion)); return SYS_ERR_OK; error: if (err_no(err1) != LIB_ERR_MEMOBJ_CREATE_ANON) { err2 = memobj_destroy_anon((struct memobj *)memobj); if (err_is_fail(err2)) { DEBUG_ERR(err2, "memobj_destroy_anon failed"); } } return err1; }
/** * \brief Unmap the memory object from a region * * \param memobj The memory object * \param region The region to remove */ static errval_t unmap_region(struct memobj *memobj, struct vregion *vregion) { errval_t err; struct memobj_one_frame_one_map *one_frame = (struct memobj_one_frame_one_map*)memobj; if (one_frame->vregion != vregion) { return LIB_ERR_VREGION_NOT_FOUND; } struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); genvaddr_t vregion_off = vregion_get_offset(vregion); err = pmap->f.unmap(pmap, vregion_base + vregion_off, memobj->size, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_UNMAP); } one_frame->vregion = NULL; return SYS_ERR_OK; }
static errval_t elf_allocate(void *state, genvaddr_t base, size_t size, uint32_t flags, void **retbase) { errval_t err; struct spawninfo *si = state; // Increase size by space wasted on first page due to page-alignment size_t base_offset = BASE_PAGE_OFFSET(base); size += base_offset; base -= base_offset; // Page-align size = ROUND_UP(size, BASE_PAGE_SIZE); cslot_t vspace_slot = si->elfload_slot; // Allocate the frames size_t sz = 0; for (lpaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref frame = { .cnode = si->segcn, .slot = si->elfload_slot++, }; err = frame_create(frame, sz, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_FRAME_CREATE); } } cslot_t spawn_vspace_slot = si->elfload_slot; cslot_t new_slot_count = si->elfload_slot - vspace_slot; // create copies of the frame capabilities for spawn vspace for (int copy_idx = 0; copy_idx < new_slot_count; copy_idx++) { struct capref frame = { .cnode = si->segcn, .slot = vspace_slot + copy_idx, }; struct capref spawn_frame = { .cnode = si->segcn, .slot = si->elfload_slot++, }; err = cap_copy(spawn_frame, frame); if (err_is_fail(err)) { // TODO: make debug printf printf("cap_copy failed for src_slot = %"PRIuCSLOT", dest_slot = %"PRIuCSLOT"\n", frame.slot, spawn_frame.slot); return err_push(err, LIB_ERR_CAP_COPY); } } /* Map into my vspace */ struct memobj *memobj = malloc(sizeof(struct memobj_anon)); if (!memobj) { return LIB_ERR_MALLOC_FAIL; } struct vregion *vregion = malloc(sizeof(struct vregion)); if (!vregion) { return LIB_ERR_MALLOC_FAIL; } // Create the objects err = memobj_create_anon((struct memobj_anon*)memobj, size, 0); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); } err = vregion_map(vregion, get_current_vspace(), memobj, 0, size, VREGION_FLAGS_READ_WRITE); if (err_is_fail(err)) { return err_push(err, LIB_ERR_VSPACE_MAP); } for (lvaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref frame = { .cnode = si->segcn, .slot = vspace_slot++, }; genvaddr_t genvaddr = vspace_lvaddr_to_genvaddr(offset); err = memobj->f.fill(memobj, genvaddr, frame, sz); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = memobj->f.pagefault(memobj, vregion, offset, 0); if (err_is_fail(err)) { DEBUG_ERR(err, "lib_err_memobj_pagefault_handler"); return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } } /* Map into spawn vspace */ struct memobj *spawn_memobj = NULL; struct vregion *spawn_vregion = NULL; err = spawn_vspace_map_anon_fixed_attr(si, base, size, &spawn_vregion, &spawn_memobj, elf_to_vregion_flags(flags)); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_VSPACE_MAP); } for (lvaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref spawn_frame = { .cnode = si->segcn, .slot = spawn_vspace_slot++, }; genvaddr_t genvaddr = vspace_lvaddr_to_genvaddr(offset); err = memobj->f.fill(spawn_memobj, genvaddr, spawn_frame, sz); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = spawn_memobj->f.pagefault(spawn_memobj, spawn_vregion, offset, 0); if (err_is_fail(err)) { DEBUG_ERR(err, "lib_err_memobj_pagefault_handler"); return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } } si->vregion[si->vregions] = vregion; si->base[si->vregions++] = base; genvaddr_t genvaddr = vregion_get_base_addr(vregion) + base_offset; *retbase = (void*)vspace_genvaddr_to_lvaddr(genvaddr); return SYS_ERR_OK; } /** * \brief Load the elf image */ errval_t spawn_arch_load(struct spawninfo *si, lvaddr_t binary, size_t binary_size, genvaddr_t *entry, void** arch_load_info) { errval_t err; // Reset the elfloader_slot si->elfload_slot = 0; si->vregions = 0; struct capref cnode_cap = { .cnode = si->rootcn, .slot = ROOTCN_SLOT_SEGCN, }; // XXX: this code assumes that elf_load never needs more than 32 slots for // text frame capabilities. err = cnode_create_raw(cnode_cap, &si->segcn, DEFAULT_CNODE_SLOTS, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_SEGCN); } // Load the binary si->tls_init_base = 0; si->tls_init_len = si->tls_total_len = 0; err = elf_load_tls(EM_HOST, elf_allocate, si, binary, binary_size, entry, &si->tls_init_base, &si->tls_init_len, &si->tls_total_len); if (err_is_fail(err)) { return err; } return SYS_ERR_OK; } void spawn_arch_set_registers(void *arch_load_info, dispatcher_handle_t handle, arch_registers_state_t *enabled_area, arch_registers_state_t *disabled_area) { #if defined(__x86_64__) /* XXX: 1st argument to _start is the dispatcher pointer * see lib/crt/arch/x86_64/crt0.s */ disabled_area->rdi = get_dispatcher_shared_generic(handle)->udisp; #elif defined(__i386__) /* XXX: 1st argument to _start is the dispatcher pointer * see lib/crt/arch/x86_32/crt0.s */ disabled_area->edi = get_dispatcher_shared_generic(handle)->udisp; #endif }
/** * \brief Page fault handler * * \param memobj The memory object * \param region The associated vregion * \param offset Offset into memory object of the page fault * \param type The fault type */ static errval_t pagefault(struct memobj *memobj, struct vregion *vregion, genvaddr_t offset, vm_fault_type_t type) { errval_t err; assert(memobj->type == MEMOBJ_VFS); struct memobj_vfs *mv = (struct memobj_vfs *)memobj; struct memobj_anon *anon = &mv->anon; struct vspace *vspace = vregion_get_vspace(vregion); struct pmap *pmap = vspace_get_pmap(vspace); genvaddr_t vregion_base = vregion_get_base_addr(vregion); genvaddr_t vregion_off = vregion_get_offset(vregion); assert(vregion_off == 0); // not sure if we handle this correctly // Walk the ordered list to find the matching frame, but don't map it yet struct memobj_frame_list *walk = anon->frame_list; while (walk) { if (offset >= walk->offset && offset < walk->offset + walk->size) { break; } walk = walk->next; } if (walk == NULL) { return LIB_ERR_MEMOBJ_WRONG_OFFSET; } genvaddr_t map_offset = vregion_off + walk->offset; size_t nbytes = walk->size; // how much do we need to read from the file? if (map_offset >= mv->filesize) { // nothing goto do_map; } else if (map_offset + nbytes > mv->filesize) { // limit size of read to maximum mapping (rest is zero-filled) nbytes = mv->filesize - map_offset; } #if 0 debug_printf("fault at offset %lx, mapping at %lx-%lx from file data %lx-%lx\n", offset, vregion_base + map_offset, vregion_base + map_offset + walk->size, map_offset + mv->offset, map_offset + mv->offset + nbytes); #endif // map frame writable at temporary location so that we can safely fill it void *buf; struct memobj *tmp_memobj = NULL; struct vregion *tmp_vregion = NULL; err = vspace_map_one_frame(&buf, walk->size, walk->frame, &tmp_memobj, &tmp_vregion); if (err_is_fail(err)) { DEBUG_ERR(err, "error setting up temp mapping in mmap pagefault handler\n"); return err; // XXX } // seek file handle err = vfs_seek(mv->vh, VFS_SEEK_SET, map_offset + mv->offset); if (err_is_fail(err)) { return err; } // read contents into frame size_t rsize, pos = 0; do { err = vfs_read(mv->vh, (char *)buf + pos, nbytes - pos, &rsize); if (err_is_fail(err)) { break; } pos += rsize; } while(rsize > 0 && pos < nbytes); // destroy temp mappings // FIXME: the API for tearing down mappings is really unclear! is this sufficient? err = vregion_destroy(tmp_vregion); assert(err_is_ok(err)); err = memobj_destroy_one_frame(tmp_memobj); assert(err_is_ok(err)); //free(tmp_vregion); //free(tmp_memobj); do_map: // map at target address with appropriate flags err = pmap->f.map(pmap, vregion_base + map_offset, walk->frame, 0, walk->size, vregion_get_flags(vregion), NULL, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_MAP); } return SYS_ERR_OK; }
/** * \brief Create mappings * * \param state The object metadata * \param frame An empty slot to place the frame capability in * \param req_size The required amount by the application * \param retbuf Pointer to return the mapped buffer * \param retsize The actual size returned * * This function will returns a special error code if frame_create * fails due to the constrains to the memory server (amount of memory * or region of memory). This is to facilitate retrying with different * constraints. */ errval_t vspace_mmu_aware_map(struct vspace_mmu_aware *state, size_t req_size, void **retbuf, size_t *retsize) { errval_t err; struct capref frame; // Calculate how much still to map in size_t origsize = req_size; assert(state->mapoffset >= state->offset); if(state->mapoffset - state->offset > req_size) { req_size = 0; } else { req_size -= state->mapoffset - state->offset; } size_t alloc_size = ROUND_UP(req_size, BASE_PAGE_SIZE); size_t ret_size = 0; if (req_size > 0) { #if __x86_64__ if ((state->vregion.flags & VREGION_FLAGS_HUGE) && (state->mapoffset & HUGE_PAGE_MASK) == 0) { // this is an opportunity to switch to 1G pages if requested. // we know that we can use large pages without jumping through hoops // if state->vregion.flags has VREGION_FLAGS_HUGE set and // mapoffset is aligned to at least HUGE_PAGE_SIZE. alloc_size = ROUND_UP(req_size, HUGE_PAGE_SIZE); // goto allocation directly so we can avoid nasty code interaction // between #if __x86_64__ and the size checks, we want to be able // to use 2M pages on x86_64 also. -SG, 2015-04-30. goto allocate; } #endif if ((state->vregion.flags & VREGION_FLAGS_LARGE) && (state->mapoffset & LARGE_PAGE_MASK) == 0) { // this is an opportunity to switch to 2M pages if requested. // we know that we can use large pages without jumping through hoops // if state->vregion.flags has VREGION_FLAGS_LARGE set and // mapoffset is aligned to at least LARGE_PAGE_SIZE. alloc_size = ROUND_UP(req_size, LARGE_PAGE_SIZE); } // Create frame of appropriate size allocate: err = state->slot_alloc->alloc(state->slot_alloc, &frame); if (err_is_fail(err)) { return err_push(err, LIB_ERR_SLOT_ALLOC_NO_SPACE); } err = frame_create(frame, alloc_size, &ret_size); if (err_is_fail(err)) { if (err_no(err) == LIB_ERR_RAM_ALLOC_MS_CONSTRAINTS) { // we can only get 4k frames for now; retry with 4k if (alloc_size > BASE_PAGE_SIZE && req_size <= BASE_PAGE_SIZE) { alloc_size = BASE_PAGE_SIZE; goto allocate; } return err_push(err, LIB_ERR_FRAME_CREATE_MS_CONSTRAINTS); } return err_push(err, LIB_ERR_FRAME_CREATE); } assert(ret_size >= req_size); origsize += ret_size - req_size; req_size = ret_size; if (state->consumed + req_size > state->size) { err = cap_delete(frame); if (err_is_fail(err)) { debug_err(__FILE__, __func__, __LINE__, err, "cap_delete failed"); } state->slot_alloc->free(state->slot_alloc, frame); return LIB_ERR_VSPACE_MMU_AWARE_NO_SPACE; } // Map it in err = state->memobj.m.f.fill(&state->memobj.m, state->mapoffset, frame, req_size); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = state->memobj.m.f.pagefault(&state->memobj.m, &state->vregion, state->mapoffset, 0); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } } // Return buffer genvaddr_t gvaddr = vregion_get_base_addr(&state->vregion) + state->offset; *retbuf = (void*)vspace_genvaddr_to_lvaddr(gvaddr); *retsize = origsize; state->mapoffset += req_size; state->offset += origsize; state->consumed += origsize; return SYS_ERR_OK; }
static errval_t elf_allocate(void *state, genvaddr_t base, size_t size, uint32_t flags, void **retbase) { errval_t err; struct spawninfo *si = state; // Increase size by space wasted on first page due to page-alignment size_t base_offset = BASE_PAGE_OFFSET(base); size += base_offset; base -= base_offset; // Page-align size = ROUND_UP(size, BASE_PAGE_SIZE); cslot_t vspace_slot = si->elfload_slot; // Allocate the frames size_t sz = 0; for (lpaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref frame = { .cnode = si->segcn, .slot = si->elfload_slot++, }; err = frame_create(frame, sz, NULL); if (err_is_fail(err)) { return err_push(err, LIB_ERR_FRAME_CREATE); } } cslot_t spawn_vspace_slot = si->elfload_slot; cslot_t new_slot_count = si->elfload_slot - vspace_slot; // create copies of the frame capabilities for spawn vspace for (int copy_idx = 0; copy_idx < new_slot_count; copy_idx++) { struct capref frame = { .cnode = si->segcn, .slot = vspace_slot + copy_idx, }; struct capref spawn_frame = { .cnode = si->segcn, .slot = si->elfload_slot++, }; err = cap_copy(spawn_frame, frame); if (err_is_fail(err)) { // TODO: make debug printf printf("cap_copy failed for src_slot = %"PRIuCSLOT", dest_slot = %"PRIuCSLOT"\n", frame.slot, spawn_frame.slot); return err_push(err, LIB_ERR_CAP_COPY); } } /* Map into my vspace */ struct memobj *memobj = malloc(sizeof(struct memobj_anon)); if (!memobj) { return LIB_ERR_MALLOC_FAIL; } struct vregion *vregion = malloc(sizeof(struct vregion)); if (!vregion) { return LIB_ERR_MALLOC_FAIL; } // Create the objects err = memobj_create_anon((struct memobj_anon*)memobj, size, 0); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); } err = vregion_map(vregion, get_current_vspace(), memobj, 0, size, VREGION_FLAGS_READ_WRITE); if (err_is_fail(err)) { return err_push(err, LIB_ERR_VSPACE_MAP); } for (lvaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref frame = { .cnode = si->segcn, .slot = vspace_slot++, }; genvaddr_t genvaddr = vspace_lvaddr_to_genvaddr(offset); err = memobj->f.fill(memobj, genvaddr, frame, sz); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = memobj->f.pagefault(memobj, vregion, offset, 0); if (err_is_fail(err)) { DEBUG_ERR(err, "lib_err_memobj_pagefault_handler"); return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } } /* Map into spawn vspace */ struct memobj *spawn_memobj = NULL; struct vregion *spawn_vregion = NULL; err = spawn_vspace_map_anon_fixed_attr(si, base, size, &spawn_vregion, &spawn_memobj, elf_to_vregion_flags(flags)); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_VSPACE_MAP); } for (lvaddr_t offset = 0; offset < size; offset += sz) { sz = 1UL << log2floor(size - offset); struct capref frame = { .cnode = si->segcn, .slot = spawn_vspace_slot++, }; genvaddr_t genvaddr = vspace_lvaddr_to_genvaddr(offset); err = memobj->f.fill(spawn_memobj, genvaddr, frame, sz); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = spawn_memobj->f.pagefault(spawn_memobj, spawn_vregion, offset, 0); if (err_is_fail(err)) { DEBUG_ERR(err, "lib_err_memobj_pagefault_handler"); return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } } genvaddr_t genvaddr = vregion_get_base_addr(vregion) + base_offset; *retbase = (void*)vspace_genvaddr_to_lvaddr(genvaddr); return SYS_ERR_OK; } /** * \brief Load the elf image */ errval_t spawn_arch_load(struct spawninfo *si, lvaddr_t binary, size_t binary_size, genvaddr_t *entry, void** arch_info) { errval_t err; // Reset the elfloader_slot si->elfload_slot = 0; struct capref cnode_cap = { .cnode = si->rootcn, .slot = ROOTCN_SLOT_SEGCN, }; err = cnode_create_raw(cnode_cap, &si->segcn, DEFAULT_CNODE_SLOTS, NULL); if (err_is_fail(err)) { return err_push(err, SPAWN_ERR_CREATE_SEGCN); } // TLS is NYI si->tls_init_base = 0; si->tls_init_len = si->tls_total_len = 0; // Load the binary err = elf_load(EM_HOST, elf_allocate, si, binary, binary_size, entry); if (err_is_fail(err)) { return err; } struct Elf32_Shdr* got_shdr = elf32_find_section_header_name(binary, binary_size, ".got"); if (got_shdr) { *arch_info = (void*)got_shdr->sh_addr; } else { return SPAWN_ERR_LOAD; } return SYS_ERR_OK; } void spawn_arch_set_registers(void *arch_load_info, dispatcher_handle_t handle, arch_registers_state_t *enabled_area, arch_registers_state_t *disabled_area) { assert(arch_load_info != NULL); uintptr_t got_base = (uintptr_t)arch_load_info; struct dispatcher_shared_arm* disp_arm = get_dispatcher_shared_arm(handle); disp_arm->got_base = got_base; enabled_area->regs[REG_OFFSET(PIC_REGISTER)] = got_base; disabled_area->regs[REG_OFFSET(PIC_REGISTER)] = got_base; #ifndef __ARM_ARCH_7M__ //armv7-m does not support these flags enabled_area->named.cpsr = CPSR_F_MASK | ARM_MODE_USR; disabled_area->named.cpsr = CPSR_F_MASK | ARM_MODE_USR; #endif }
/** * \brief Wrapper for creating and mapping a memory object * of type one frame with specific flags and a specific alignment */ errval_t vspace_map_one_frame_attr_aligned(void **retaddr, size_t size, struct capref frame, vregion_flags_t flags, size_t alignment, struct memobj **retmemobj, struct vregion **retvregion) { errval_t err1, err2; struct memobj *memobj = NULL; struct vregion *vregion = NULL; size = ROUND_UP(size, BASE_PAGE_SIZE); // Allocate space memobj = calloc(1, sizeof(struct memobj_one_frame)); if (!memobj) { err1 = LIB_ERR_MALLOC_FAIL; goto error; } vregion = calloc(1, sizeof(struct vregion)); if (!vregion) { err1 = LIB_ERR_MALLOC_FAIL; goto error; } // Create mappings err1 = memobj_create_one_frame((struct memobj_one_frame*)memobj, size, 0); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_MEMOBJ_CREATE_ONE_FRAME); goto error; } err1 = memobj->f.fill(memobj, 0, frame, size); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_MEMOBJ_FILL); goto error; } err1 = vregion_map_aligned(vregion, get_current_vspace(), memobj, 0, size, flags, alignment); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_VREGION_MAP); goto error; } err1 = memobj->f.pagefault(memobj, vregion, 0, 0); if (err_is_fail(err1)) { err1 = err_push(err1, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); goto error; } *retaddr = (void*)vspace_genvaddr_to_lvaddr(vregion_get_base_addr(vregion)); if (retmemobj) { *retmemobj = memobj; } if (retvregion) { *retvregion = vregion; } return SYS_ERR_OK; error: if (memobj) { err2 = memobj_destroy_one_frame(memobj); if (err_is_fail(err2)) { DEBUG_ERR(err2, "memobj_destroy_anon failed"); } } if (vregion) { err2 = vregion_destroy(vregion); if (err_is_fail(err2)) { DEBUG_ERR(err2, "vregion_destroy failed"); } } return err1; }
/** * \brief allocates size bytes of memory page interleaved the nodes specified in * the nodemask. * * \param size size of the memory region in bytes * \param nodemask subset of nodes to consider for allocation * \param pagesize preferred page size to be used * * \returns pointer to the mapped memory region * * should only be used for large areas consisting of multiple pages. * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc_interleaved_subset(size_t size, size_t pagesize, struct bitmap *nodemask) { errval_t err; /* clear out invalid bits */ bitmap_clear_range(nodemask, numa_num_configured_nodes(), bitmap_get_nbits(nodemask)); /* get the number of nodes */ nodeid_t nodes = bitmap_get_weight(nodemask); if (nodes == 0) { return NULL; } NUMA_DEBUG_ALLOC("allocating interleaved using %" PRIuNODEID " nodes\n", nodes); assert(nodes <= numa_num_configured_nodes()); vregion_flags_t flags; validate_page_size(&pagesize, &flags); size_t stride = pagesize; size_t node_size = size / nodes; node_size = (node_size + pagesize - 1) & ~(pagesize - 1); /* update total size as this may change due to rounding of node sizes*/ size = nodes * node_size; /* * XXX: we may want to keep track of numa alloced frames */ struct memobj_numa *memobj = calloc(1, sizeof(struct memobj_numa)); err = memobj_create_numa(memobj, size, 0, numa_num_configured_nodes(), stride); if (err_is_fail(err)) { return NULL; } bitmap_bit_t node = bitmap_get_first(nodemask); nodeid_t node_idx=0; while(node != BITMAP_BIT_NONE) { struct capref frame; err = numa_frame_alloc_on_node(&frame, node_size, (nodeid_t)node, NULL); if (err_is_fail(err)) { DEBUG_ERR(err, "numa_frame_alloc_on_node"); goto out_err; } memobj->m.f.fill(&memobj->m, node_idx, frame, 0); ++node_idx; node = bitmap_get_next(nodemask, node); } struct vregion *vreg = calloc(1, sizeof(struct vregion)); if (vreg == NULL) { goto out_err; } err = vregion_map_aligned(vreg, get_current_vspace(), &memobj->m, 0, size, flags, pagesize); if (err_is_fail(err)) { DEBUG_ERR(err, "vregion_map_aligned"); goto out_err; } err = memobj->m.f.pagefault(&memobj->m, vreg, 0, 0); if (err_is_fail(err)) { vregion_destroy(vreg); free(vreg); DEBUG_ERR(err, "memobj.m.f.pagefault"); goto out_err; } // XXX - Is this right? return (void *)(uintptr_t)vregion_get_base_addr(vreg); out_err: for (int i = 0; i < node_idx; ++i) { struct capref frame; memobj->m.f.unfill(&memobj->m, node_idx, &frame, NULL); cap_delete(frame); } return NULL; }
errval_t vspace_mmu_aware_reset(struct vspace_mmu_aware *state, struct capref frame, size_t size) { errval_t err; struct vregion *vregion; struct capref oldframe; void *vbuf; // create copy of new region err = slot_alloc(&oldframe); if (err_is_fail(err)) { return err; } err = cap_copy(oldframe, frame); if (err_is_fail(err)) { return err; } err = vspace_map_one_frame_attr_aligned(&vbuf, size, oldframe, VREGION_FLAGS_READ_WRITE | VREGION_FLAGS_LARGE, LARGE_PAGE_SIZE, NULL, &vregion); if (err_is_fail(err)) { return err; } // copy over data to new frame genvaddr_t gen_base = vregion_get_base_addr(&state->vregion); memcpy(vbuf, (void*)(lvaddr_t)gen_base, state->mapoffset); err = vregion_destroy(vregion); if (err_is_fail(err)) { return err; } genvaddr_t offset = 0; // Unmap backing frames for [0, size) in state.vregion do { err = state->memobj.m.f.unfill(&state->memobj.m, 0, &oldframe, &offset); if (err_is_fail(err) && err_no(err) != LIB_ERR_MEMOBJ_UNFILL_TOO_HIGH_OFFSET) { return err_push(err, LIB_ERR_MEMOBJ_UNMAP_REGION); } struct frame_identity fi; // increase address err = invoke_frame_identify(oldframe, &fi); if (err_is_fail(err)) { return err; } offset += (1UL<<fi.bits); err = cap_destroy(oldframe); if (err_is_fail(err)) { return err; } } while(offset < state->mapoffset); // Map new frame in err = state->memobj.m.f.fill(&state->memobj.m, 0, frame, size); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_FILL); } err = state->memobj.m.f.pagefault(&state->memobj.m, &state->vregion, 0, 0); if (err_is_fail(err)) { return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); } state->mapoffset = size; return SYS_ERR_OK; }