errval_t caps_delete_step(struct cte *ret_next) { errval_t err = SYS_ERR_OK; assert(ret_next); assert(ret_next->cap.type == ObjType_Null); if (!delete_head) { assert(!delete_tail); return SYS_ERR_CAP_NOT_FOUND; } assert(delete_head->mdbnode.in_delete == true); TRACE_CAP_MSG("performing delete step", delete_head); struct cte *cte = delete_head, *next = cte->delete_node.next; if (cte->mdbnode.locked) { err = SYS_ERR_CAP_LOCKED; } else if (distcap_is_foreign(cte) || has_copies(cte)) { err = cleanup_copy(cte); } else if (cte->mdbnode.remote_copies) { err = caps_copyout_last(cte, ret_next); if (err_is_ok(err)) { if (next) { delete_head = next; } else { delete_head = delete_tail = NULL; } err = SYS_ERR_DELETE_LAST_OWNED; } } else { // XXX: need to clear delete_list flag because it's reused for // clear_list? -SG cte->delete_node.next = NULL; err = caps_delete_last(cte, ret_next); if (err_is_fail(err)) { TRACE_CAP_MSG("delete last failed", cte); // if delete_last fails, reinsert in delete list cte->delete_node.next = next; } } if (err_is_ok(err)) { if (next) { delete_head = next; } else { delete_head = delete_tail = NULL; } } return err; }
/// Create page mappings errval_t caps_copy_to_vnode(struct cte *dest_vnode_cte, cslot_t dest_slot, struct cte *src_cte, uintptr_t flags, uintptr_t offset, uintptr_t pte_count, struct cte *mapping_cte) { struct capability *src_cap = &src_cte->cap; struct capability *dest_cap = &dest_vnode_cte->cap; assert(mapping_cte->cap.type == ObjType_Null); mapping_handler_t handler_func = handler[dest_cap->type]; assert(handler_func != NULL); errval_t err; err = handler_func(dest_cap, dest_slot, src_cap, flags, offset, pte_count, mapping_cte); if (err_is_fail(err)) { assert(mapping_cte->cap.type == ObjType_Null); debug(SUBSYS_PAGING, "caps_copy_to_vnode: handler func returned %"PRIuERRV"\n", err); return err; } /* insert mapping cap into mdb */ err = mdb_insert(mapping_cte); if (err_is_fail(err)) { printk(LOG_ERR, "%s: mdb_insert: %"PRIuERRV"\n", __FUNCTION__, err); } TRACE_CAP_MSG("created", mapping_cte); return err; }
static void caps_mark_revoke_generic(struct cte *cte) { errval_t err; if (cte->cap.type == ObjType_Null) { return; } if (distcap_is_in_delete(cte)) { return; } TRACE_CAP_MSG("marking for revoke", cte); err = caps_try_delete(cte); if (err_no(err) == SYS_ERR_DELETE_LAST_OWNED) { cte->mdbnode.in_delete = true; //cte->delete_node.next_slot = 0; // insert into delete list if (!delete_tail) { assert(!delete_head); delete_head = delete_tail = cte; cte->delete_node.next = NULL; } else { assert(delete_head); assert(!delete_tail->delete_node.next); delete_tail->delete_node.next = cte; delete_tail = cte; cte->delete_node.next = NULL; } TRACE_CAP_MSG("inserted into delete list", cte); // because the monitors will perform a 2PC that deletes all foreign // copies before starting the delete steps, and because the in_delete // bit marks this cap as "busy" (see distcap_get_state), we can clear // the remote copies bit. cte->mdbnode.remote_copies = 0; } else if (err_is_fail(err)) { // some serious mojo went down in the cleanup voodoo panic("error while marking/deleting descendant cap for revoke:" " 0x%"PRIuERRV"\n", err); } }
errval_t caps_clear_step(struct cte *ret_ram_cap) { errval_t err; assert(!delete_head); assert(!delete_tail); if (!clear_head) { assert(!clear_tail); return SYS_ERR_CAP_NOT_FOUND; } assert((clear_head == clear_tail) == (!clear_head->delete_node.next)); struct cte *cte = clear_head; #ifndef NDEBUG // some sanity checks #define CHECK_SLOT(slot) do { \ assert((slot)->cap.type == ObjType_Null \ || (slot)->cap.type == ObjType_CNode \ || (slot)->cap.type == ObjType_Dispatcher); \ if ((slot)->cap.type != ObjType_Null) { \ assert((slot)->mdbnode.in_delete); \ } \ } while (0) if (cte->cap.type == ObjType_CNode) { for (cslot_t i = 0; i < (1<<cte->cap.u.cnode.bits); i++) { struct cte *slot = caps_locate_slot(cte->cap.u.cnode.cnode, i); CHECK_SLOT(slot); } } else if (cte->cap.type == ObjType_Dispatcher) { struct dcb *dcb = cte->cap.u.dispatcher.dcb; CHECK_SLOT(&dcb->cspace); CHECK_SLOT(&dcb->disp_cte); } else { panic("Non-CNode/Dispatcher cap type in clear list!"); } #undef CHECK_SLOT #endif TRACE_CAP_MSG("caps_clear_step for", cte); struct cte *after = cte->delete_node.next; err = cleanup_last(cte, ret_ram_cap); if (err_is_ok(err)) { if (after) { clear_head = after; } else { clear_head = clear_tail = NULL; } } return err; }
errval_t caps_revoke(struct cte *cte) { TRACE_CAP_MSG("revoking", cte); if (cte->mdbnode.locked) { return SYS_ERR_CAP_LOCKED; } return SYS_ERR_RETRY_THROUGH_MONITOR; }
/** * \brief Delete all copies of a foreign cap. */ errval_t caps_delete_foreigns(struct cte *cte) { errval_t err; struct cte *next; if (cte->mdbnode.owner == my_core_id) { debug(SUBSYS_CAPS, "%s called on %d for %p, owner=%d\n", __FUNCTION__, my_core_id, cte, cte->mdbnode.owner); return SYS_ERR_DELETE_REMOTE_LOCAL; } assert(cte->mdbnode.owner != my_core_id); if (cte->mdbnode.in_delete) { printk(LOG_WARN, "foreign caps with in_delete set," " this should not happen"); } TRACE_CAP_MSG("del copies of", cte); // XXX: should we go predecessor as well? for (next = mdb_successor(cte); next && is_copy(&cte->cap, &next->cap); next = mdb_successor(cte)) { // XXX: should this be == or != ? assert(next->mdbnode.owner != my_core_id); if (next->mdbnode.in_delete) { printk(LOG_WARN, "foreign caps with in_delete set," " this should not happen"); } err = cleanup_copy(next); if (err_is_fail(err)) { panic("error while deleting extra foreign copy for remote_delete:" " %"PRIuERRV"\n", err); } } // The capabilities should all be foreign, by nature of the request. // Foreign capabilities are rarely locked, since they can be deleted // immediately. The only time a foreign capability is locked is during // move and retrieve operations. In either case, the lock on the same // capability must also be acquired on the owner for the operation to // succeed. Thus, we can safely unlock any capability here iff the // monitor guarentees that this operation is only executed when the // capability is locked on the owner. cte->mdbnode.locked = false; err = caps_try_delete(cte); if (err_is_fail(err)) { panic("error while deleting foreign copy for remote_delete:" " %"PRIuERRV"\n", err); } return SYS_ERR_OK; }
/** * \brief Cleanup a cap copy but not the object represented by the cap */ static errval_t cleanup_copy(struct cte *cte) { errval_t err; TRACE_CAP_MSG("cleaning up copy", cte); struct capability *cap = &cte->cap; if (type_is_vnode(cap->type) || cap->type == ObjType_Frame || cap->type == ObjType_DevFrame) { unmap_capability(cte); } if (distcap_is_foreign(cte)) { TRACE_CAP_MSG("cleaning up non-owned copy", cte); if (cte->mdbnode.remote_copies || cte->mdbnode.remote_descs) { struct cte *ancestor = mdb_find_ancestor(cte); if (ancestor) { mdb_set_relations(ancestor, RRELS_DESC_BIT, RRELS_DESC_BIT); } } } err = mdb_remove(cte); if (err_is_fail(err)) { return err; } TRACE_CAP_MSG("cleaned up copy", cte); assert(!mdb_reachable(cte)); memset(cte, 0, sizeof(*cte)); return SYS_ERR_OK; }
errval_t caps_delete(struct cte *cte) { errval_t err; TRACE_CAP_MSG("deleting", cte); if (cte->mdbnode.locked) { return SYS_ERR_CAP_LOCKED; } err = caps_try_delete(cte); if (err_no(err) == SYS_ERR_DELETE_LAST_OWNED) { err = err_push(err, SYS_ERR_RETRY_THROUGH_MONITOR); } return err; }
struct sysret sys_monitor_remote_relations(capaddr_t root_addr, uint8_t root_bits, capaddr_t cptr, uint8_t bits, uint8_t relations, uint8_t mask) { errval_t err; struct cte *cte; err = sys_double_lookup(root_addr, root_bits, cptr, bits, &cte); if (err_is_fail(err)) { printf("%s: error in double_lookup: %"PRIuERRV"\n", __FUNCTION__, err); return SYSRET(err); } #ifdef TRACE_PMEM_CAPS if (caps_should_trace(&cte->cap)) { char buf[512]; static const char chars[] = "~~01"; #define MK01(b) ((int)((b)!=0)) #define BITC(BIT) (chars[(2*MK01(mask & BIT)+MK01(relations & BIT))]) snprintf(buf, 512, "set remote: c %c, a %c, d %c", BITC(RRELS_COPY_BIT), BITC(RRELS_ANCS_BIT), BITC(RRELS_DESC_BIT)); #undef BITC #undef MK01 TRACE_CAP_MSG(buf, cte); } #endif if (mask) { mdb_set_relations(cte, relations, mask); } relations = 0; if (cte->mdbnode.remote_copies) { relations |= RRELS_COPY_BIT; } if (cte->mdbnode.remote_ancs) { relations |= RRELS_ANCS_BIT; } if (cte->mdbnode.remote_descs) { relations |= RRELS_DESC_BIT; } return (struct sysret){ .error = SYS_ERR_OK, .value = relations }; }
static void clear_list_prepend(struct cte *cte) { // make sure we don't break delete list by inserting cte that hasn't been // removed from delete list into clear list assert(cte->delete_node.next == NULL); if (!clear_tail) { assert(!clear_head); clear_head = clear_tail = cte; cte->delete_node.next = NULL; } else { assert(clear_head); cte->delete_node.next = clear_head; clear_head = cte; } TRACE_CAP_MSG("inserted into clear list", cte); }
/// Create page mappings errval_t caps_copy_to_vnode(struct cte *dest_vnode_cte, cslot_t dest_slot, struct cte *src_cte, uintptr_t flags, uintptr_t offset, uintptr_t pte_count, struct cte *mapping_cte) { assert(type_is_vnode(dest_vnode_cte->cap.type)); assert(mapping_cte->cap.type == ObjType_Null); struct capability *src_cap = &src_cte->cap; struct capability *dest_cap = &dest_vnode_cte->cap; mapping_handler_t handler_func = handler[dest_cap->type]; assert(handler_func != NULL); cslot_t last_slot = dest_slot + pte_count; // TODO: PAE if (last_slot > X86_32_PTABLE_SIZE) { // requested map overlaps leaf page table debug(SUBSYS_CAPS, "caps_copy_to_vnode: requested mapping spans multiple leaf page tables\n"); return SYS_ERR_VM_RETRY_SINGLE; } errval_t r = handler_func(dest_cap, dest_slot, src_cap, flags, offset, pte_count, mapping_cte); if (err_is_fail(r)) { assert(mapping_cte->cap.type == ObjType_Null); debug(SUBSYS_PAGING, "caps_copy_to_vnode: handler func returned %d\n", r); return r; } /* insert mapping cap into mdb */ errval_t err = mdb_insert(mapping_cte); if (err_is_fail(err)) { printk(LOG_ERR, "%s: mdb_insert: %"PRIuERRV"\n", __FUNCTION__, err); } TRACE_CAP_MSG("created", mapping_cte); return err; }
/** * \brief Try a "simple" delete of a cap. If this fails, the monitor needs to * negotiate a delete across the system. */ static errval_t caps_try_delete(struct cte *cte) { TRACE_CAP_MSG("trying simple delete", cte); if (distcap_is_in_delete(cte) || cte->mdbnode.locked) { // locked or already in process of being deleted return SYS_ERR_CAP_LOCKED; } if (distcap_is_foreign(cte) || has_copies(cte)) { return cleanup_copy(cte); } else if (cte->mdbnode.remote_copies || cte->cap.type == ObjType_CNode || cte->cap.type == ObjType_Dispatcher) { return SYS_ERR_DELETE_LAST_OWNED; } else { return cleanup_last(cte, NULL); } }
errval_t unmap_capability(struct cte *mem) { errval_t err; TRACE_CAP_MSG("unmapping", mem); genvaddr_t vaddr = 0; bool single_page_flush = false; int mapping_count = 0, unmap_count = 0; genpaddr_t faddr = get_address(&mem->cap); // iterate over all mappings associated with 'mem' and unmap them struct cte *next = mem; struct cte *to_delete = NULL; while ((next = mdb_successor(next)) && get_address(&next->cap) == faddr) { TRACE_CAP_MSG("looking at", next); if (next->cap.type == get_mapping_type(mem->cap.type) && next->cap.u.frame_mapping.cap == &mem->cap) { TRACE_CAP_MSG("cleaning up mapping", next); mapping_count ++; // do unmap struct Frame_Mapping *mapping = &next->cap.u.frame_mapping; struct cte *pgtable = mapping->ptable; if (!pgtable) { debug(SUBSYS_PAGING, "mapping->ptable == 0: just deleting mapping\n"); // mem is not mapped, so just return goto delete_mapping; } if (!type_is_vnode(pgtable->cap.type)) { debug(SUBSYS_PAGING, "mapping->ptable.type not vnode (%d): just deleting mapping\n", mapping->ptable->cap.type); // mem is not mapped, so just return goto delete_mapping; } lpaddr_t ptable_lp = gen_phys_to_local_phys(get_address(&pgtable->cap)); lvaddr_t ptable_lv = local_phys_to_mem(ptable_lp); cslot_t slot = mapping->entry; // unmap do_unmap(ptable_lv, slot, mapping->pte_count); unmap_count ++; // TLB flush? if (unmap_count == 1) { err = compile_vaddr(pgtable, slot, &vaddr); if (err_is_ok(err) && mapping->pte_count == 1) { single_page_flush = true; } } delete_mapping: assert(!next->delete_node.next); // mark mapping cap for delete: cannot do delete here as it messes // up mdb_successor() next->delete_node.next = to_delete; to_delete = next; } } // delete mapping caps while (to_delete) { next = to_delete->delete_node.next; err = caps_delete(to_delete); if (err_is_fail(err)) { printk(LOG_NOTE, "caps_delete: %"PRIuERRV"\n", err); } to_delete = next; } TRACE_CAP_MSGF(mem, "unmapped %d/%d instances", unmap_count, mapping_count); // do TLB flush if (single_page_flush) { do_one_tlb_flush(vaddr); } else { do_full_tlb_flush(); } return SYS_ERR_OK; }
/** * \brief Delete the last copy of a cap in the entire system. * \bug Somewhere in the delete process, the remote_ancs property should be * propagated to (remote) immediate descendants. */ errval_t caps_delete_last(struct cte *cte, struct cte *ret_ram_cap) { errval_t err; assert(!has_copies(cte)); if (cte->mdbnode.remote_copies) { printk(LOG_WARN, "delete_last but remote_copies is set\n"); } TRACE_CAP_MSG("deleting last", cte); // try simple delete // XXX: this really should always fail, enforce that? -MN // XXX: this is probably not the way we should enforce/check this -SG err = caps_try_delete(cte); if (err_no(err) != SYS_ERR_DELETE_LAST_OWNED && err_no(err) != SYS_ERR_CAP_LOCKED) { return err; } // CNodes and dcbs contain further CTEs, so cannot simply be deleted // instead, we place them in a clear list, which is progressivly worked // through until each list element contains only ctes that point to // other CNodes or dcbs, at which point they are scheduled for final // deletion, which only happens when the clear lists are empty. if (cte->cap.type == ObjType_CNode) { debug(SUBSYS_CAPS, "deleting last copy of cnode: %p\n", cte); // Mark all non-Null slots for deletion for (cslot_t i = 0; i < (1<<cte->cap.u.cnode.bits); i++) { struct cte *slot = caps_locate_slot(cte->cap.u.cnode.cnode, i); caps_mark_revoke_generic(slot); } assert(cte->delete_node.next == NULL || delete_head == cte); cte->delete_node.next = NULL; clear_list_prepend(cte); return SYS_ERR_OK; } else if (cte->cap.type == ObjType_Dispatcher) { debug(SUBSYS_CAPS, "deleting last copy of dispatcher: %p\n", cte); struct capability *cap = &cte->cap; struct dcb *dcb = cap->u.dispatcher.dcb; // Remove from queue scheduler_remove(dcb); // Reset current if it was deleted if (dcb_current == dcb) { dcb_current = NULL; } // Remove from wakeup queue wakeup_remove(dcb); // Notify monitor if (monitor_ep.u.endpoint.listener == dcb) { printk(LOG_ERR, "monitor terminated; expect badness!\n"); monitor_ep.u.endpoint.listener = NULL; } else if (monitor_ep.u.endpoint.listener != NULL) { uintptr_t payload = dcb->domain_id; err = lmp_deliver_payload(&monitor_ep, NULL, &payload, 1, false); if (err_is_fail(err)) { printk(LOG_NOTE, "while notifying monitor about domain exit: %"PRIuERRV".\n", err); printk(LOG_NOTE, "please add the console output to the following bug report: https://code.systems.ethz.ch/T78\n"); } assert(err_is_ok(err)); } caps_mark_revoke_generic(&dcb->cspace); caps_mark_revoke_generic(&dcb->disp_cte); assert(cte->delete_node.next == NULL || delete_head == cte); cte->delete_node.next = NULL; clear_list_prepend(cte); return SYS_ERR_OK; } else { // last copy, perform object cleanup return cleanup_last(cte, ret_ram_cap); } }
/** * \brief Cleanup the last cap copy for an object and the object itself */ static errval_t cleanup_last(struct cte *cte, struct cte *ret_ram_cap) { errval_t err; TRACE_CAP_MSG("cleaning up last copy", cte); struct capability *cap = &cte->cap; assert(!has_copies(cte)); if (cte->mdbnode.remote_copies) { printk(LOG_WARN, "cleanup_last but remote_copies is set\n"); } if (ret_ram_cap && ret_ram_cap->cap.type != ObjType_Null) { return SYS_ERR_SLOT_IN_USE; } struct RAM ram = { .bits = 0 }; size_t len = sizeof(struct RAM) / sizeof(uintptr_t) + 1; if (!has_descendants(cte) && !has_ancestors(cte)) { // List all RAM-backed capabilities here // NB: ObjType_PhysAddr and ObjType_DevFrame caps are *not* RAM-backed! switch(cap->type) { case ObjType_RAM: ram.base = cap->u.ram.base; ram.bits = cap->u.ram.bits; break; case ObjType_Frame: ram.base = cap->u.frame.base; ram.bits = cap->u.frame.bits; break; case ObjType_CNode: ram.base = cap->u.cnode.cnode; ram.bits = cap->u.cnode.bits + OBJBITS_CTE; break; case ObjType_Dispatcher: // Convert to genpaddr ram.base = local_phys_to_gen_phys(mem_to_local_phys((lvaddr_t)cap->u.dispatcher.dcb)); ram.bits = OBJBITS_DISPATCHER; break; default: // Handle VNodes here if(type_is_vnode(cap->type)) { ram.base = get_address(cap); ram.bits = vnode_objbits(cap->type); } break; } } err = cleanup_copy(cte); if (err_is_fail(err)) { return err; } if(ram.bits > 0) { // Send back as RAM cap to monitor if (ret_ram_cap) { if (dcb_current != monitor_ep.u.endpoint.listener) { printk(LOG_WARN, "sending fresh ram cap to non-monitor?\n"); } assert(ret_ram_cap->cap.type == ObjType_Null); ret_ram_cap->cap.u.ram = ram; ret_ram_cap->cap.type = ObjType_RAM; err = mdb_insert(ret_ram_cap); TRACE_CAP_MSG("reclaimed", ret_ram_cap); assert(err_is_ok(err)); // note: this is a "success" code! err = SYS_ERR_RAM_CAP_CREATED; } else if (monitor_ep.type && monitor_ep.u.endpoint.listener != 0) { #ifdef TRACE_PMEM_CAPS struct cte ramcte; memset(&ramcte, 0, sizeof(ramcte)); ramcte.cap.u.ram = ram; ramcte.cap.type = ObjType_RAM; TRACE_CAP_MSG("reclaimed", ret_ram_cap); #endif // XXX: This looks pretty ugly. We need an interface. err = lmp_deliver_payload(&monitor_ep, NULL, (uintptr_t *)&ram, len, false); } else { printk(LOG_WARN, "dropping ram cap base %08"PRIxGENPADDR" bits %"PRIu8"\n", ram.base, ram.bits); } if (err_no(err) == SYS_ERR_LMP_BUF_OVERFLOW) { printk(LOG_WARN, "dropped ram cap base %08"PRIxGENPADDR" bits %"PRIu8"\n", ram.base, ram.bits); err = SYS_ERR_OK; } else { assert(err_is_ok(err)); } } return err; } /* * Mark phase of revoke mark & sweep */ static void caps_mark_revoke_copy(struct cte *cte) { errval_t err; err = caps_try_delete(cte); if (err_is_fail(err)) { // this should not happen as there is a copy of the cap panic("error while marking/deleting cap copy for revoke:" " 0x%"PRIuERRV"\n", err); } }