/* * Forward write_c() on a cheri_fd to the underlying file descriptor. */ struct cheri_fd_ret cheri_fd_write(__capability const void *buf_c, size_t nbytes) { struct cheri_fd_ret ret; __capability struct cheri_fd *cfp; void *buf; /* XXXRW: Object-capability user permission check on idc. */ /* XXXRW: Change to check permissions directly and throw exception. */ if (!(cheri_getperm(buf_c) & CHERI_PERM_LOAD)) { ret.cfr_retval0 = -1; ret.cfr_retval1 = EPROT; return (ret); } buf = (void *)buf_c; /* Check that cheri_fd hasn't been revoked. */ cfp = cheri_getidc(); if (cfp->cf_fd == -1) { ret.cfr_retval0 = -1; ret.cfr_retval1 = EBADF; return (ret); } /* Forward to operating system. */ ret.cfr_retval0 = write(cfp->cf_fd, buf, min(nbytes, cheri_getlen(buf_c) - cheri_getoffset(buf_c))); ret.cfr_retval1 = (ret.cfr_retval0 < 0 ? errno : 0); return (ret); }
/* * Forward fstat() on a cheri_fd to the underlying file descriptor. */ struct cheri_fd_ret cheri_fd_fstat(__capability struct stat *sb_c) { struct cheri_fd_ret ret; __capability struct cheri_fd *cfp; struct stat *sb; /* XXXRW: Object-capability user permission check on idc. */ /* XXXRW: Change to check permissions directly and throw exception. */ if (!(cheri_getperm(sb_c) & CHERI_PERM_STORE) || !(cheri_getlen(sb_c) >= sizeof(*sb))) { ret.cfr_retval0 = -1; ret.cfr_retval1 = EPROT; return (ret); } sb = (void *)sb_c; /* Check that the cheri_fd hasn't been revoked. */ cfp = cheri_getidc(); if (cfp->cf_fd == -1) { ret.cfr_retval0 = -1; ret.cfr_retval1 = EBADF; return (ret); } /* Forward to operating system. */ ret.cfr_retval0 = fstat(cfp->cf_fd, sb); ret.cfr_retval1 = (ret.cfr_retval0 < 0 ? errno : 0); return (ret); }
void * kernel_realloc(void *cp, size_t nbytes) { size_t cur_space; /* Space in the current bucket */ size_t smaller_space; /* Space in the next smaller bucket */ union overhead *op; char *res; if (cp == NULL) return (kernel_malloc(nbytes)); op = find_overhead(cp); if (op == NULL) return (NULL); cur_space = (1 << (op->ov_index + 3)) - sizeof(*op); /* avoid the copy if same size block */ /* * XXX-BD: Arguably we should be tracking the actual allocation * not just the bucket size so that we can do a full malloc+memcpy * when the caller has restricted the length of the pointer passed * realloc() but is growing the buffer within the current bucket. * * As it is, this code contains a leak where realloc recovers access * to the contents in foo: * char *foo = malloc(10); * strcpy(foo, "abcdefghi"); * cheri_setbouds(foo, 5); * foo = realloc(foo, 10); */ smaller_space = (1 << (op->ov_index + 2)) - sizeof(*op); if (nbytes <= cur_space && nbytes > smaller_space) return (cheri_andperm(cheri_setbounds(op + 1, nbytes), cheri_getperm(cp))); if ((res = kernel_malloc(nbytes)) == NULL) return (NULL); /* * Only copy data the caller had access to even if this is less * than the size of the original allocation. This risks surprise * for some programmers, but to do otherwise risks information leaks. */ memcpy(res, cp, (nbytes <= cheri_getlen(cp)) ? nbytes : cheri_getlen(cp)); res = cheri_andperm(res, cheri_getperm(cp)); kernel_free(cp); return (res); }
register_t cheritest_libcheri_userfn_getstack(void) { struct cheri_stack cs; struct cheri_stack_frame *csfp; u_int stack_depth; int retval; retval = sysarch(CHERI_GET_STACK, &cs); if (retval != 0) cheritest_failure_err("sysarch(CHERI_GET_STACK) failed"); /* Does stack layout look sensible enough to continue? */ if ((cs.cs_tsize % CHERI_FRAME_SIZE) != 0) cheritest_failure_errx( "stack size (%ld) not a multiple of frame size", cs.cs_tsize); stack_depth = cs.cs_tsize / CHERI_FRAME_SIZE; if ((cs.cs_tsp % CHERI_FRAME_SIZE) != 0) cheritest_failure_errx( "stack pointer (%ld) not a multiple of frame size", cs.cs_tsp); /* Validate that two stack frames are found. */ if (cs.cs_tsp != cs.cs_tsize - (register_t)(2 * CHERI_FRAME_SIZE)) cheritest_failure_errx("stack contains %d frames; expected " "2", (cs.cs_tsize - (2 * CHERI_FRAME_SIZE)) / CHERI_FRAME_SIZE); /* Validate that the first is a saved ambient context. */ csfp = &cs.cs_frames[stack_depth - 1]; if (cheri_getbase(csfp->csf_pcc) != cheri_getbase(cheri_getpcc()) || cheri_getlen(csfp->csf_pcc) != cheri_getlen(cheri_getpcc())) cheritest_failure_errx("frame 0: not global code cap"); /* Validate that the second is cheritest_objectp. */ csfp = &cs.cs_frames[stack_depth - 2]; if ((cheri_getbase(csfp->csf_pcc) != cheri_getbase( sandbox_object_getobject(cheritest_objectp).co_codecap)) || cheri_getlen(csfp->csf_pcc) != cheri_getlen( sandbox_object_getobject(cheritest_objectp).co_codecap)) cheritest_failure_errx("frame 1: not sandbox code cap"); return (0); }
static void sb_read_fn(png_structp png_ptr, png_bytep data, png_size_t length) { void *io_ptr = png_get_io_ptr(png_ptr); #if 0 printf("in sb_read_fn, data base 0x%jx offset 0x%jx length 0x%zx (min len 0x%zx)\n", cheri_getbase(data), cheri_getoffset(data), cheri_getlen(data), length); #endif libpng_sb_read_callback(io_ptr, cheri_setlen(data, length), length); }
/* * Check for high-precision bounds for a variety of small object sizes, * allocated from the stack. These should be precise regardless of capability * compression, as the allocator promises to align things suitably. Test both * static and dynamic allocation. */ static void test_bounds_precise(__capability void *c, size_t expected_len) { size_t len, offset; /* Confirm precise lower bound: offset of zero. */ offset = cheri_getoffset(c); if (offset != 0) cheritest_failure_errx("offset (%jd) not zero", offset); /* Confirm precise upper bound: length of expected size for type. */ len = cheri_getlen(c); if (len != expected_len) cheritest_failure_errx("length (%jd) not expected %jd", len, expected_len); cheritest_success(); }
context_t act_init(context_t own_context, init_info_t* info, size_t init_base, size_t init_entry) { KERNEL_TRACE("init", "activation init"); internel_if.message_send = kernel_seal(act_send_message_get_trampoline(), act_ref_type); internel_if.message_reply = kernel_seal(act_send_return_get_trampoline(), act_sync_ref_type); setup_syscall_interface(&internel_if); kernel_next_act = 0; // This is a dummy. Our first context has already been created reg_frame_t frame; bzero(&frame, sizeof(struct reg_frame)); // Register the kernel (exception) activation act_t * kernel_act = &kernel_acts[0]; act_register(&frame, &kernel_queue.queue, "kernel", status_terminated, NULL, cheri_getbase(cheri_getpcc())); /* The kernel context already exists and we set it here */ kernel_act->context = own_context; // Create and register the init activation KERNEL_TRACE("act", "Retroactively creating init activation"); /* Not a dummy here. We will subset our own c0/pcc for init. init is loaded directly after the kernel */ bzero(&frame, sizeof(struct reg_frame)); size_t length = cheri_getlen(cheri_getdefault()) - init_base; frame.cf_c0 = cheri_setbounds(cheri_setoffset(cheri_getdefault(), init_base), length); capability pcc = cheri_setbounds(cheri_setoffset(cheri_getpcc(), init_base), length); KERNEL_TRACE("act", "assuming init has virtual entry point %lx", init_entry); frame.cf_c12 = frame.cf_pcc = cheri_setoffset(pcc, init_entry); /* provide config info to init. c3 is the conventional register */ frame.cf_c3 = info; act_t * init_act = &kernel_acts[namespace_num_boot]; act_register_create(&frame, &init_queue.queue, "init", status_alive, NULL); /* The boot activation should be the current activation */ sched_schedule(init_act); return init_act->context; }
/* * Unwind the trusted stack by the specified number of frames (or all). */ int cheri_stack_unwind(ucontext_t *uap, register_t ret, u_int op, u_int num_frames) { struct cheri_frame *cfp; struct cheri_stack cs; struct cheri_stack_frame *csfp; u_int stack_size, stack_frames; register_t saved_mcreg0; if (op != CHERI_STACK_UNWIND_OP_N && op != CHERI_STACK_UNWIND_OP_ALL) { errno = EINVAL; return (-1); } /* * Request to unwind zero frames is a no-op: no state transformation * is needed. */ if ((op == CHERI_STACK_UNWIND_OP_N) && (num_frames == 0)) return (0); /* * Retrieve trusted stack and validate before attempting to unwind. */ if (sysarch(CHERI_GET_STACK, &cs) != 0) return (-1); if ((cs.cs_tsize % CHERI_FRAME_SIZE) != 0 || (cs.cs_tsp > cs.cs_tsize) || (cs.cs_tsp % CHERI_FRAME_SIZE) != 0) { errno = ERANGE; return (-1); } /* * See if there is room on the stack for that much unwinding. */ stack_size = cs.cs_tsize / CHERI_FRAME_SIZE; stack_frames = (cs.cs_tsize - cs.cs_tsp) / CHERI_FRAME_SIZE; if (op == CHERI_STACK_UNWIND_OP_ALL) num_frames = stack_frames; if ((num_frames < 0) || (stack_frames < num_frames)) { errno = ERANGE; return (-1); } /* * Restore state from the last frame being unwound. */ csfp = &cs.cs_frames[stack_size - (stack_frames - num_frames) - 1]; #if 0 /* Make sure we will be returning to ambient authority. */ if (cheri_getbase(csfp->csf_pcc) != cheri_getbase(cheri_getpcc()) || cheri_getlen(csfp->csf_pcc) != cheri_getlen(cheri_getpcc())) return (-1); #endif /* * Pop stack desired number of frames. */ cs.cs_tsp += num_frames * CHERI_FRAME_SIZE; assert(cs.cs_tsp <= cs.cs_tsize); #ifdef __CHERI_PURE_CAPABILITY__ cfp = &uap->uc_mcontext.mc_cheriframe; #else cfp = (struct cheri_frame *)uap->uc_mcontext.mc_cp2state; if (cfp == NULL || uap->uc_mcontext.mc_cp2state_len != sizeof(*cfp)) { errno = ERANGE; return (-1); } #endif /* * Zero the capability register file, explicitly restoring $pcc and * $idc from the last trusted-stack frame. */ memset(cfp, 0, sizeof(*cfp)); cfp->cf_idc = csfp->csf_idc; cfp->cf_pcc = csfp->csf_pcc; /* * Zero the general-purpose register file. restore not only $pc, but * also the slot for $zero, which will hold a magic number across * sigcode and sigreturn(). Also set a return value. * * XXXRW: The kernel unwinder sets V1 to the signal number? */ saved_mcreg0 = uap->uc_mcontext.mc_regs[0]; memset(uap->uc_mcontext.mc_regs, 0, sizeof(uap->uc_mcontext.mc_regs)); uap->uc_mcontext.mc_regs[0] = saved_mcreg0; uap->uc_mcontext.mc_pc = cheri_getoffset(cfp->cf_pcc); uap->uc_mcontext.mc_regs[V0] = ret; /* * Update kernel view of trusted stack. */ if (sysarch(CHERI_SET_STACK, &cs) != 0) return (-1); return (0); }
act_t * act_register(reg_frame_t *frame, queue_t *queue, const char *name, status_e create_in_status, act_control_t *parent, size_t base) { (void)parent; KERNEL_TRACE("act", "Registering activation %s", name); if(kernel_next_act >= MAX_ACTIVATIONS) { kernel_panic("no act slot"); } act_t * act = kernel_acts + kernel_next_act; act->image_base = base; //TODO bit of a hack. the kernel needs to know what namespace service to use if(kernel_next_act == namespace_num_namespace) { KERNEL_TRACE("act", "found namespace"); ns_ref = act_create_sealed_ref(act); } #ifndef __LITE__ /* set name */ kernel_assert(ACT_NAME_MAX_LEN > 0); int name_len = 0; if(VCAP(name, 1, VCAP_R)) { name_len = imin(cheri_getlen(name), ACT_NAME_MAX_LEN-1); } for(int i = 0; i < name_len; i++) { char c = name[i]; act->name[i] = c; /* todo: sanitize the name if we do not trust it */ } act->name[name_len] = '\0'; #endif /* set status */ act->status = create_in_status; /*Some "documentation" for the interface between the kernel and activation start * * These fields are setup by the caller of act_register * * * * a0 : user GP argument (goes to main) * * c3 : user Cap argument (goes to main) * * * * These fields are setup by act_register itself. Although the queue is an argument to the function * * * * c21 : self control reference * * c23 : namespace reference (may be null for init and namespace) * * c24 : kernel interface table * * c25 : queue */ /* set namespace */ frame->cf_c21 = (capability)act_create_sealed_ctrl_ref(act); frame->cf_c23 = (capability)ns_ref; frame->cf_c24 = (capability)get_if(); frame->cf_c25 = (capability)queue; /* set queue */ msg_queue_init(act, queue); /* set expected sequence to not expecting */ act->sync_state.sync_token = 0; act->sync_state.sync_condition = 0; /* set scheduling status */ sched_create(act); /*update next_act */ kernel_next_act++; KERNEL_TRACE("register", "image base of %s is %lx", act->name, act->image_base); KERNEL_TRACE("act", "%s OK! ", __func__); return act; }