context_t act_init(context_t own_context, init_info_t* info, size_t init_base, size_t init_entry) { KERNEL_TRACE("init", "activation init"); internel_if.message_send = kernel_seal(act_send_message_get_trampoline(), act_ref_type); internel_if.message_reply = kernel_seal(act_send_return_get_trampoline(), act_sync_ref_type); setup_syscall_interface(&internel_if); kernel_next_act = 0; // This is a dummy. Our first context has already been created reg_frame_t frame; bzero(&frame, sizeof(struct reg_frame)); // Register the kernel (exception) activation act_t * kernel_act = &kernel_acts[0]; act_register(&frame, &kernel_queue.queue, "kernel", status_terminated, NULL, cheri_getbase(cheri_getpcc())); /* The kernel context already exists and we set it here */ kernel_act->context = own_context; // Create and register the init activation KERNEL_TRACE("act", "Retroactively creating init activation"); /* Not a dummy here. We will subset our own c0/pcc for init. init is loaded directly after the kernel */ bzero(&frame, sizeof(struct reg_frame)); size_t length = cheri_getlen(cheri_getdefault()) - init_base; frame.cf_c0 = cheri_setbounds(cheri_setoffset(cheri_getdefault(), init_base), length); capability pcc = cheri_setbounds(cheri_setoffset(cheri_getpcc(), init_base), length); KERNEL_TRACE("act", "assuming init has virtual entry point %lx", init_entry); frame.cf_c12 = frame.cf_pcc = cheri_setoffset(pcc, init_entry); /* provide config info to init. c3 is the conventional register */ frame.cf_c3 = info; act_t * init_act = &kernel_acts[namespace_num_boot]; act_register_create(&frame, &init_queue.queue, "init", status_alive, NULL); /* The boot activation should be the current activation */ sched_schedule(init_act); return init_act->context; }
/* * Allocate more memory to the indicated bucket. */ static void morecore(int bucket) { char *buf; union overhead *op; size_t sz; /* size of desired block */ int amt; /* amount to allocate */ int nblks; /* how many blocks we get */ /* * sbrk_size <= 0 only for big, FLUFFY, requests (about * 2^30 bytes on a VAX, I think) or for a negative arg. */ sz = 1 << (bucket + 3); #ifdef MALLOC_DEBUG ASSERT(sz > 0); #else if (sz <= 0) return; #endif if (sz < pagesz) { amt = pagesz; nblks = amt / sz; } else { amt = sz + pagesz; nblks = 1; } if (amt > pagepool_end - pagepool_start) if (__morepages(amt/pagesz) == 0) return; /* * XXXRW: For now, depend on a global $c0 -- but shouldn't need to as * we could be deriving from heap. */ buf = cheri_setoffset(cheri_getdefault(), pagepool_start); buf = cheri_setbounds(buf, amt); pagepool_start += amt; /* * Add new memory allocated to that on * free list for this hash bucket. */ nextf[bucket] = op = cheri_setbounds(buf, sz); while (--nblks > 0) { op->ov_next = (union overhead *)cheri_setbounds(buf + sz, sz); buf += sz; op = op->ov_next; } }
void bootloader_main(void) { /* Init hardware */ hw_init(); /* Initialize elf-loader environment */ init_elf_loader(); /* Load the nano kernel. Doing this will install exception vectors */ boot_printf("Boot: loading nano kernel ...\n"); nano_init_t * nano_init = (nano_init_t *)load_nano(); //We have to rederive this as an executable cap nano_init = (nano_init_t*)cheri_setoffset(cheri_getpcc(),cheri_getoffset(nano_init)); /* TODO: we could have some boot exception vectors if we want exception handling in boot. */ /* These should be in ROM as a part of the boot image (i.e. make a couple more dedicated sections */ cp0_status_bev_set(0); boot_printf("Boot: loading kernel ...\n"); size_t entry = load_kernel(); boot_printf("Boot: loading init ...\n"); boot_info_t *bi = load_init(); size_t invalid_length = bi->init_end; capability phy_start = cheri_setbounds(cheri_setoffset(cheri_getdefault(), MIPS_KSEG0), invalid_length); /* Do we actually need this? */ //boot_printf("Invalidating %p length %lx:\n", phy_start, invalid_length); //caches_invalidate(phy_start, invalid_length); register_t mem_size = bi->init_end - bi->nano_end; /* Jumps to the nano kernel init. This will completely destroy boot and so we can never return here. * All registers will be cleared apart from a specified few. mem_size of memory will be left unmanaged and the * rest will be returned as a reservation. The third argument is an extra argument to the kernel */ boot_printf("Jumping to nano kernel...\n"); BOOT_PRINT_CAP(nano_init); nano_init(mem_size, entry, bi->init_begin - bi->kernel_begin, bi->init_entry); }
/* XXXBD: should be done in sandbox_init(), but need access to argv[0]. */ int sandbox_program_init(void) { int fd = -1; int mib[4]; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PATHNAME; mib[3] = -1; char buf[MAXPATHLEN]; size_t cb = sizeof(buf); /* XXXBD: do this with RTLD or hypothentical getexecfd(). */ if ((sysctl(mib, 4, buf, &cb, NULL, 0) != -1) && cb > 0) { if ((fd = open(buf, O_RDONLY)) == -1) warn("%s: open %s (from kern.proc.pathname.(-1))", __func__, buf); } if (sandbox_parse_ccall_methods(fd, &main_provided_classes, &main_required_methods) == -1) { warn("%s: sandbox_parse_ccall_methods for main program", __func__); close(fd); return (-1); } if (sandbox_set_required_method_variables(cheri_getdefault(), main_required_methods) == -1) { warnx("%s: sandbox_set_required_method_variables for main " "program", __func__); return (-1); } /* XXXBD: cheri_system needs to do this. */ cheri_system_vtable = sandbox_make_vtable(NULL, "_cheri_system_object", main_provided_classes); cheri_fd_vtable = sandbox_make_vtable(NULL, "cheri_fd", main_provided_classes); close(fd); return (0); }
int sandbox_object_load(struct sandbox_class *sbcp, struct sandbox_object *sbop) { __capability void *basecap, *sbcap; struct sandbox_metadata *sbm; size_t length; int saved_errno; uint8_t *base; /* * Perform an initial reservation of space for the sandbox, but using * anonymous memory that is neither readable nor writable. This * ensures there is space for all the various segments we will be * installing later. * * The rough sandbox memory map is as follows: * * K + 0x1000 [stack] * K [guard page] * J + 0x1000 [heap] * J [guard page] * 0x8000 [memory mapped binary] (SANDBOX_ENTRY) * 0x2000 [guard page] * 0x1000 [read-only sandbox metadata page] * 0x0000 [guard page] * * Address constants in sandbox.h must be synchronised with the layout * implemented here. Location and contents of sandbox metadata is * part of the ABI. */ length = sbcp->sbc_sandboxlen; base = sbop->sbo_mem = mmap(NULL, length, 0, MAP_ANON, -1, 0); if (sbop->sbo_mem == MAP_FAILED) { saved_errno = errno; warn("%s: mmap region", __func__); goto error; } /* * Skip guard page(s) to the base of the metadata structure. */ base += SANDBOX_METADATA_BASE; length -= SANDBOX_METADATA_BASE; /* * Map metadata structure -- but can't fill it out until we have * calculated all the other addresses involved. */ if ((sbm = mmap(base, METADATA_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_FIXED, -1, 0)) == MAP_FAILED) { saved_errno = errno; warn("%s: mmap metadata", __func__); goto error; } /* * Skip forward to the mapping location for the binary -- in case we * add more metadata in the future. Assert that we didn't bump into * the sandbox entry address. This address is hard to change as it is * the address used in static linking for sandboxed code. */ assert((register_t)base - (register_t)sbop->sbo_mem < SANDBOX_ENTRY); base = (void *)((register_t)sbop->sbo_mem + SANDBOX_ENTRY); length = sbcp->sbc_sandboxlen - SANDBOX_ENTRY; /* * Map program binary. */ if (mmap(base, sbcp->sbc_stat.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, sbcp->sbc_fd, 0) == MAP_FAILED) { saved_errno = errno; warn("%s: mmap %s", __func__, sbcp->sbc_path); goto error; } base += roundup2(sbcp->sbc_stat.st_size, PAGE_SIZE); length += roundup2(sbcp->sbc_stat.st_size, PAGE_SIZE); /* * Skip guard page. */ base += GUARD_PAGE_SIZE; length -= GUARD_PAGE_SIZE; /* * Heap. */ sbop->sbo_heapbase = (register_t)base - (register_t)sbop->sbo_mem; sbop->sbo_heaplen = length - (GUARD_PAGE_SIZE + STACK_SIZE); if (mmap(base, sbop->sbo_heaplen, PROT_READ | PROT_WRITE, MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) { saved_errno = errno; warn("%s: mmap heap", __func__); goto error; } memset(base, 0, sbop->sbo_heaplen); base += sbop->sbo_heaplen; length -= sbop->sbo_heaplen; /* * Skip guard page. */ base += GUARD_PAGE_SIZE; length -= GUARD_PAGE_SIZE; /* * Stack. */ if (mmap(base, length, PROT_READ | PROT_WRITE, MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) { saved_errno = errno; warn("%s: mmap stack", __func__); goto error; } memset(base, 0, length); base += STACK_SIZE; length -= STACK_SIZE; /* * There should not be too much, nor too little space remaining. 0 * is our Goldilocks number. */ assert(length == 0); /* * Now that addresses are known, write out metadata for in-sandbox * use; then mprotect() so that it can't be modified by the sandbox. */ sbm->sbm_heapbase = sbop->sbo_heapbase; sbm->sbm_heaplen = sbop->sbo_heaplen; if (mprotect(base, METADATA_SIZE, PROT_READ) < 0) { saved_errno = errno; warn("%s: mprotect metadata", __func__); goto error; } if (sbcp->sbc_sandbox_class_statp != NULL) { (void)sandbox_stat_object_register( &sbop->sbo_sandbox_object_statp, sbcp->sbc_sandbox_class_statp, SANDBOX_OBJECT_TYPE_POINTER, (uintptr_t)sbop->sbo_mem); SANDBOX_CLASS_ALLOC(sbcp->sbc_sandbox_class_statp); } /* * Construct a generic capability that describes the combined * data/code segment that we will seal. */ basecap = cheri_ptrtype(sbop->sbo_mem, sbcp->sbc_sandboxlen, SANDBOX_ENTRY); /* Construct sealed code capability. */ sbcap = cheri_andperm(basecap, CHERI_PERM_EXECUTE | CHERI_PERM_LOAD | CHERI_PERM_SEAL); sbop->sbo_cheri_object.co_codecap = cheri_sealcode(sbcap); /* Construct sealed data capability. */ sbcap = cheri_andperm(basecap, CHERI_PERM_LOAD | CHERI_PERM_STORE | CHERI_PERM_LOAD_CAP | CHERI_PERM_STORE_CAP | CHERI_PERM_STORE_EPHEM_CAP); sbop->sbo_cheri_object.co_datacap = cheri_sealdata(sbcap, basecap); /* * Construct an object capability for the system class instance that * will be passed into the sandbox. Its code capability is just our * $c0; the data capability is to the sandbox structure itself, which * allows the system class to identify which sandbox a request is * being issued from. * * Note that $c0 in the 'sandbox' will be set from $pcc, so leave a * full set of write/etc permissions on the code capability. */ basecap = cheri_settype(cheri_getdefault(), (register_t)CHERI_CLASS_ENTRY(libcheri_system)); sbop->sbo_cheri_system_object.co_codecap = cheri_sealcode(basecap); sbcap = cheri_ptr(sbop, sizeof(*sbop)); sbcap = cheri_andperm(sbcap, CHERI_PERM_LOAD | CHERI_PERM_STORE | CHERI_PERM_LOAD_CAP | CHERI_PERM_STORE_CAP | CHERI_PERM_STORE_EPHEM_CAP); sbop->sbo_cheri_system_object.co_datacap = cheri_sealdata(sbcap, basecap); return (0); error: if (sbop->sbo_mem != NULL) munmap(sbop->sbo_mem, sbcp->sbc_sandboxlen); errno = saved_errno; return (-1); }
int sandbox_class_new(const char *path, size_t maxmaplen, struct sandbox_class **sbcpp) { char sandbox_basename[MAXPATHLEN]; struct sandbox_class *sbcp; struct sandbox_class **new_sandbox_classes; int fd, saved_errno; size_t i; fd = open(path, O_RDONLY); if (fd == -1) { saved_errno = errno; warn("%s: open %s", __func__, path); errno = saved_errno; return (-1); } sbcp = calloc(1, sizeof(*sbcp)); if (sbcp == NULL) { saved_errno = errno; warn("%s: malloc", __func__); close(fd); errno = saved_errno; return (-1); } sbcp->sbc_fd = fd; sbcp->sbc_path = strdup(path); if (sbcp->sbc_path == NULL) { saved_errno = errno; warn("%s: fstat %s", __func__, path); goto error; } if (fstat(sbcp->sbc_fd, &sbcp->sbc_stat) < 0) { saved_errno = errno; warn("%s: fstat %s", __func__, path); goto error; } /* * Parse the ELF and produce mappings for code and data. */ if ((sbcp->sbc_codemap = sandbox_parse_elf64(fd, SANDBOX_LOADELF_CODE)) == NULL) { saved_errno = EINVAL; warnx("%s: sandbox_parse_elf64(CODE) failed for %s", __func__, path); goto error; } if ((sbcp->sbc_datamap = sandbox_parse_elf64(fd, SANDBOX_LOADELF_DATA)) == NULL) { saved_errno = EINVAL; warnx("%s: sandbox_parse_elf64(DATA) failed for %s", __func__, path); goto error; } /* * Don't allow sandbox binaries to request over maxmaplen of * either code or data. * * XXXBD: It would be nice to have some sort of default sane * value, but programs can have astonishing amounts of BSS * relative to file size. */ if (maxmaplen > 0 && sandbox_map_maxoffset(sbcp->sbc_codemap) > maxmaplen) { saved_errno = EINVAL; warnx("%s: %s code too large", __func__, path); goto error; } if (maxmaplen > 0 && sandbox_map_maxoffset(sbcp->sbc_datamap) > maxmaplen) { saved_errno = EINVAL; warnx("%s: %s data too large", __func__, path); goto error; } /* * Initialise the class mapping: this will be the code capabilty used * by all sandboxes. For now, we just map the code segment in exactly * the same way we do the data segment. In the future, we will want * to initialise them differently. */ if (sandbox_class_load(sbcp) < 0) { saved_errno = EINVAL; warnx("%s: sandbox_class_load() failed for %s", __func__, path); goto error; } /* * Resolve methods in other classes. */ for (i = 0; i < num_sandbox_classes; i++) { /* XXXBD: Check there are no conflicting class names */ if (sandbox_resolve_methods(sbcp->sbc_provided_classes, sandbox_classes[i]->sbc_required_methods) < 0) { saved_errno = EINVAL; warnx("%s: sandbox_resolve_methods() failed providing " "methods from %s to %s", __func__, path, sandbox_classes[i]->sbc_path); goto error; } if (sandbox_resolve_methods( sandbox_classes[i]->sbc_provided_classes, sbcp->sbc_required_methods) < 0) { saved_errno = EINVAL; warnx("%s: sandbox_resolve_methods() failed providing " "methods from %s to %s", __func__, sandbox_classes[i]->sbc_path, path); goto error; } } /* * XXXBD: failure to initalize main_*_methods should eventually * be impossible and trigger an assert. */ if (main_provided_classes != NULL && main_required_methods != NULL) { if (sandbox_resolve_methods(sbcp->sbc_provided_classes, main_required_methods) < 0) { saved_errno = EINVAL; warnx("%s: sandbox_resolve_methods() failed providing " "methods from %s main program", __func__, path); goto error; } if (sandbox_resolve_methods(main_provided_classes, sbcp->sbc_required_methods) < 0) { saved_errno = EINVAL; warnx("%s: sandbox_resolve_methods() failed providing " "methods from main program to %s", __func__, path); goto error; } } /* * Update main program method variables. * * XXXBD: Doing this in every class is inefficient. */ if (sandbox_set_required_method_variables(cheri_getdefault(), main_required_methods) == -1) { warnx("%s: sandbox_set_required_method_variables for main " "program", __func__); return (-1); } /* * Register the class on the list of classes. */ if (max_sandbox_classes == 0) { max_sandbox_classes = 4; if ((sandbox_classes = calloc(max_sandbox_classes, sizeof(*sandbox_classes))) == NULL) { saved_errno = errno; warn("%s: calloc sandbox_classes array", __func__); goto error; } } if (num_sandbox_classes >= max_sandbox_classes) { if ((new_sandbox_classes = realloc(sandbox_classes, max_sandbox_classes * 2 * sizeof(*sandbox_classes))) == NULL) { saved_errno = errno; warn("%s: realloc sandbox_classes array", __func__); goto error; } free(sandbox_classes); sandbox_classes = new_sandbox_classes; max_sandbox_classes *= 2; } sandbox_classes[num_sandbox_classes++] = sbcp; /* * Register the class/object for statistics; also register a single * "noname" method to catch statistics for unnamed or overflow * methods. * * NB: We use the base address of the sandbox's $c0 as the 'name' of * the object, since this is most useful for comparison to capability * values. However, you could also see an argument for using 'sb' * itself here. */ (void)sandbox_stat_class_register(&sbcp->sbc_sandbox_class_statp, basename_r(path, sandbox_basename)); (void)sandbox_stat_method_register(&sbcp->sbc_sandbox_method_nonamep, sbcp->sbc_sandbox_class_statp, "<noname>"); *sbcpp = sbcp; return (0); error: if (sbcp->sbc_path != NULL) free(sbcp->sbc_path); close(sbcp->sbc_fd); free(sbcp); errno = saved_errno; return (-1); }