static void cheri_capability_set_kern(struct chericap *cp) { cheri_capability_set(cp, CHERI_CAP_KERN_PERMS, CHERI_CAP_KERN_BASE, CHERI_CAP_KERN_LENGTH, CHERI_CAP_KERN_OFFSET); }
static void cheri_capability_set_user_pcc(struct chericap *cp) { cheri_capability_set(cp, CHERI_CAP_USER_CODE_PERMS, CHERI_CAP_USER_CODE_OTYPE, CHERI_CAP_USER_CODE_BASE, CHERI_CAP_USER_CODE_LENGTH, CHERI_CAP_USER_CODE_OFFSET); }
static void cheri_capability_set_user_c0(struct chericap *cp) { cheri_capability_set(cp, CHERI_CAP_USER_DATA_PERMS, CHERI_CAP_USER_DATA_OTYPE, CHERI_CAP_USER_DATA_BASE, CHERI_CAP_USER_DATA_LENGTH, CHERI_CAP_USER_DATA_OFFSET); }
static void cheri_capability_set_priv(struct chericap *cp) { cheri_capability_set(cp, CHERI_CAP_PRIV_PERMS, CHERI_CAP_PRIV_OTYPE, CHERI_CAP_PRIV_BASE, CHERI_CAP_PRIV_LENGTH, CHERI_CAP_PRIV_OFFSET); }
static void cheri_capability_set_user_entry(struct chericap *cp, unsigned long entry_addr) { /* * Set the jump target regigster for the pure capability calling * convention. */ cheri_capability_set(cp, CHERI_CAP_USER_CODE_PERMS, CHERI_CAP_USER_CODE_BASE, CHERI_CAP_USER_CODE_LENGTH, entry_addr); }
static void cheri_capability_set_user_sigcode(struct chericap *cp, struct sysentvec *se) { uintptr_t base; int szsigcode = *se->sv_szsigcode; /* XXX: true for mips64 and mip64-cheriabi... */ base = (uintptr_t)se->sv_psstrings - szsigcode; base = rounddown2(base, sizeof(struct chericap)); cheri_capability_set(cp, CHERI_CAP_USER_CODE_PERMS, CHERI_CAP_USER_CODE_OTYPE, (void *)base, szsigcode, 0); }
static void cheriabi_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct cheri_signal *csigp; u_long stackbase, stacklen; bzero((caddr_t)td->td_frame, sizeof(struct trapframe)); KASSERT(stack % sizeof(struct chericap) == 0, ("CheriABI stack pointer not properly aligned")); cheri_capability_set(&td->td_proc->p_md.md_cheri_mmap_cap, CHERI_CAP_USER_MMAP_PERMS, CHERI_CAP_USER_MMAP_BASE, CHERI_CAP_USER_MMAP_LENGTH, CHERI_CAP_USER_MMAP_OFFSET); td->td_frame->pc = imgp->entry_addr; td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE | (mips_rd_status() & MIPS_SR_INT_MASK) | MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_COP_2_BIT; cheri_exec_setregs(td, imgp->entry_addr); cheri_stack_init(td->td_pcb); /* * Pass a pointer to the struct cheriabi_execdata at the top of the * stack. * * XXXBD: should likely be read only */ cheri_capability_set(&td->td_frame->c3, CHERI_CAP_USER_DATA_PERMS, (void *)stack, sizeof(struct cheriabi_execdata), 0); /* * Restrict the stack capability to the maximum region allowed for * this process and adjust sp accordingly. * * XXXBD: 8MB should be the process stack limit. */ CTASSERT(CHERI_CAP_USER_DATA_BASE == 0); stackbase = USRSTACK - (1024 * 1024 * 8); KASSERT(stack > stackbase, ("top of stack 0x%lx is below stack base 0x%lx", stack, stackbase)); stacklen = stack - stackbase; cheri_capability_set(&td->td_frame->stc, CHERI_CAP_USER_DATA_PERMS, (void *)stackbase, stacklen, 0); td->td_frame->sp = stacklen; /* * Also update the signal stack. The default set in * cheri_exec_setregs() covers the whole address space. */ csigp = &td->td_pcb->pcb_cherisignal; cheri_capability_set(&csigp->csig_stc, CHERI_CAP_USER_DATA_PERMS, (void *)stackbase, stacklen, 0); /* XXX: set sp for signal stack! */ td->td_md.md_flags &= ~MDTD_FPUSED; if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, (struct thread *)0); td->td_md.md_ss_addr = 0; td->td_md.md_tls_tcb_offset = TLS_TP_OFFSET + TLS_TCB_SIZE_C; }
/* * The CheriABI version of sendsig(9) largely borrows from the MIPS version, * and it is important to keep them in sync. It differs primarily in that it * must also be aware of user stack-handling ABIs, so is also sensitive to our * (fluctuating) design choices in how $stc and $sp interact. The current * design uses ($stc + $sp) for stack-relative references, so early on we have * to calculate a 'relocated' version of $sp that we can then use for * MIPS-style access. * * This code, as with the CHERI-aware MIPS code, makes a privilege * determination in order to decide whether to trust the stack exposed by the * user code for the purposes of signal handling. We must use the alternative * stack if there is any indication that using the user thread's stack state * might violate the userspace compartmentalisation model. */ static void cheriabi_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; struct sigacts *psp; struct sigframe_c sf, *sfp; uintptr_t stackbase; vm_offset_t sp; int cheri_is_sandboxed; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; /* * In CheriABI, $sp is $stc relative, so calculate a relocation base * that must be combined with regs->sp from this point onwards. * Unfortunately, we won't retain bounds and permissions information * (as is the case elsewhere in CheriABI). While 'stackbase' * suggests that $stc's offset isn't included, in practice it will be, * although we may reasonably assume that it will be zero. * * If it turns out we will be delivering to the alternative signal * stack, we'll recalculate stackbase later. */ CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &td->td_pcb->pcb_regs.stc, 0); CHERI_CTOPTR(stackbase, CHERI_CR_CTEMP0, CHERI_CR_KDC); oonstack = sigonstack(stackbase + regs->sp); /* * CHERI affects signal delivery in the following ways: * * (1) Additional capability-coprocessor state is exposed via * extensions to the context frame placed on the stack. * * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we * consider user state to be 'sandboxed' and therefore to require * special delivery handling which includes a domain-switch to the * thread's context-switch domain. (This is done by * cheri_sendsig()). * * (3) If an alternative signal stack is not defined, and we are in a * 'sandboxed' state, then we have two choices: (a) if the signal * is of type SA_SANDBOX_UNWIND, we will automatically unwind the * trusted stack by one frame; (b) otherwise, we will terminate * the process unconditionally. */ cheri_is_sandboxed = cheri_signal_sandboxed(td); /* * We provide the ability to drop into the debugger in two different * circumstances: (1) if the code running is sandboxed; and (2) if the * fault is a CHERI protection fault. Handle both here for the * non-unwind case. Do this before we rewrite any general-purpose or * capability register state for the thread. */ #if DDB if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal) kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox"); else if (sig == SIGPROT && security_cheri_debugger_on_sigprot) kdb_enter(KDB_WHY_CHERI, "SIGPROT delivered outside sandbox"); #endif /* * If a thread is running sandboxed, we can't rely on $sp which may * not point at a valid stack in the ambient context, or even be * maliciously manipulated. We must therefore always use the * alternative stack. We are also therefore unable to tell whether we * are on the alternative stack, so must clear 'oonstack' here. * * XXXRW: This requires significant further thinking; however, the net * upshot is that it is not a good idea to do an object-capability * invoke() from a signal handler, as with so many other things in * life. */ if (cheri_is_sandboxed != 0) oonstack = 0; /* save user context */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; #if 0 /* * XXX-BD: stack_t type differs and we can't just fake a capabilty. * We don't restore the value so what purpose does it serve? */ sf.sf_uc.uc_stack = td->td_sigstk; #endif sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; cheri_capability_copy(&sf.sf_uc.uc_mcontext.mc_tls, &td->td_md.md_tls_cap); sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; #if defined(CPU_HAVEFPU) if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } #endif /* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */ sf.sf_uc.uc_mcontext.cause = regs->cause; cheri_trapframe_to_cheriframe(&td->td_pcb->pcb_regs, &sf.sf_uc.uc_mcontext.mc_cheriframe); /* * Allocate and validate space for the signal handler context. For * CheriABI purposes, 'sp' from this point forward is relocated * relative to any pertinent stack capability. For an alternative * signal context, we need to recalculate stackbase for later use in * calculating a new $sp for the signal-handling context. * * XXXRW: It seems like it would be nice to both the regular and * alternative stack calculations in the same place. However, we need * oonstack sooner. We should clean this up later. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { stackbase = (vm_offset_t)td->td_sigstk.ss_sp; sp = (vm_offset_t)(stackbase + td->td_sigstk.ss_size); } else { /* * Signals delivered when a CHERI sandbox is present must be * delivered on the alternative stack rather than a local one. * If an alternative stack isn't present, then terminate or * risk leaking capabilities (and control) to the sandbox (or * just crashing the sandbox). */ if (cheri_is_sandboxed) { mtx_unlock(&psp->ps_mtx); printf("pid %d, tid %d: signal in sandbox without " "alternative stack defined\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } sp = (vm_offset_t)(stackbase + regs->sp); } sp -= sizeof(struct sigframe_c); /* For CHERI, keep the stack pointer capability aligned. */ sp &= ~(CHERICAP_SIZE - 1); sfp = (void *)sp; /* Build the argument list for the signal handler. */ regs->a0 = sig; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. * * XXXRW: We would ideally synthesise these from the * user-originated stack capability, rather than $kdc, to be * on the safe side. */ cheri_capability_set(®s->c3, CHERI_CAP_USER_DATA_PERMS, (void *)(intptr_t)&sfp->sf_si, sizeof(sfp->sf_si), 0); cheri_capability_set(®s->c4, CHERI_CAP_USER_DATA_PERMS, (void *)(intptr_t)&sfp->sf_uc, sizeof(sfp->sf_uc), 0); /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; /* * Write out badvaddr, but don't create a valid capability * since that might allow privilege amplification. * * XXX-BD: This probably isn't the right method. * XXX-BD: Do we want to set base or offset? * * XXXRW: I think there's some argument that anything * receiving this signal is fairly privileged. But we could * generate a $ddc-relative (or $pcc-relative) capability, if * possible. (Using versions if $ddc and $pcc for the * signal-handling context rather than that which caused the * signal). I'd be tempted to deliver badvaddr as the offset * of that capability. If badvaddr is not in range, then we * should just deliver an untagged NULL-derived version * (perhaps)? */ *((uintptr_t *)&sf.sf_si.si_addr) = (uintptr_t)(void *)regs->badvaddr; } /* * XXX: No support for undocumented arguments to old style handlers. */ mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyoutcap(&sf, (void *)sfp, sizeof(sf)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); printf("pid %d, tid %d: could not copy out sigframe\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } /* * Re-acquire process locks necessary to access suitable pcb fields. * However, arguably, these operations should be atomic with the * initial inspection of 'psp'. */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); /* * Install CHERI signal-delivery register state for handler to run * in. As we don't install this in the CHERI frame on the user stack, * it will be (generally) be removed automatically on sigreturn(). */ /* XXX-BD: this isn't quite right */ cheri_sendsig(td); /* * Note that $sp must be installed relative to $stc, so re-subtract * the stack base here. */ regs->pc = (register_t)(intptr_t)catcher; regs->sp = (register_t)((intptr_t)sfp - stackbase); cheri_capability_copy(®s->c12, &psp->ps_sigcap[_SIG_IDX(sig)]); cheri_capability_copy(®s->c17, &td->td_pcb->pcb_cherisignal.csig_sigcode); }
static void cheriabi_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; struct cheri_frame *capreg; struct sigacts *psp; struct sigframe_c sf, *sfp; vm_offset_t sp; int cheri_is_sandboxed; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; capreg = &td->td_pcb->pcb_cheriframe; oonstack = sigonstack(regs->sp); /* * CHERI affects signal delivery in the following ways: * * (1) Additional capability-coprocessor state is exposed via * extensions to the context frame placed on the stack. * * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we * consider user state to be 'sandboxed' and therefore to require * special delivery handling which includes a domain-switch to the * thread's context-switch domain. (This is done by * cheri_sendsig()). * * (3) If an alternative signal stack is not defined, and we are in a * 'sandboxed' state, then we have two choices: (a) if the signal * is of type SA_SANDBOX_UNWIND, we will automatically unwind the * trusted stack by one frame; (b) otherwise, we will terminate * the process unconditionally. */ cheri_is_sandboxed = cheri_signal_sandboxed(td); /* * We provide the ability to drop into the sandbox in two different * circumstances: (1) if the code running is sandboxed; and (2) if the * fault is a CHERI protection fault. Handle both here for the * non-unwind case. Do this before we rewrite any general-purpose or * capability register state for the thread. */ #if DDB if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal) kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox"); else if (sig == SIGPROT && security_cheri_debugger_on_sigprot) kdb_enter(KDB_WHY_CHERI, "SIGPROT delivered outside sandbox"); #endif /* * If a thread is running sandboxed, we can't rely on $sp which may * not point at a valid stack in the ambient context, or even be * maliciously manipulated. We must therefore always use the * alternative stack. We are also therefore unable to tell whether we * are on the alternative stack, so must clear 'oonstack' here. * * XXXRW: This requires significant further thinking; however, the net * upshot is that it is not a good idea to do an object-capability * invoke() from a signal handler, as with so many other things in * life. */ if (cheri_is_sandboxed != 0) oonstack = 0; /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; #if 0 /* * XXX-BD: stack_t type differs and we can't just fake a capabilty. * We don't restore the value so what purpose does it serve? */ sf.sf_uc.uc_stack = td->td_sigstk; #endif sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; #if 0 /* XXX-BD: what actually makes sense here? */ sf.sf_uc.uc_mcontext.mc_tls = td->td_md.md_tls; #endif sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } /* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */ sf.sf_uc.uc_mcontext.cause = regs->cause; cheri_memcpy(&sf.sf_uc.uc_mcontext.mc_cheriframe, &td->td_pcb->pcb_cheriframe, sizeof(struct cheri_frame)); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (vm_offset_t)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { /* * Signals delivered when a CHERI sandbox is present must be * delivered on the alternative stack rather than a local one. * If an alternative stack isn't present, then terminate or * risk leaking capabilities (and control) to the sandbox (or * just crashing the sandbox). */ if (cheri_is_sandboxed) { mtx_unlock(&psp->ps_mtx); printf("pid %d, tid %d: signal in sandbox without " "alternative stack defined\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } sp = (vm_offset_t)regs->sp; } sp -= sizeof(struct sigframe_c); /* For CHERI, keep the stack pointer capability aligned. */ sp &= ~(CHERICAP_SIZE - 1); sfp = (void *)sp; /* Build the argument list for the signal handler. */ regs->a0 = sig; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ cheri_capability_set(&capreg->cf_c3, CHERI_CAP_USER_DATA_PERMS, CHERI_CAP_USER_DATA_OTYPE, (void *)(intptr_t)&sfp->sf_si, sizeof(sfp->sf_si), 0); cheri_capability_set(&capreg->cf_c4, CHERI_CAP_USER_DATA_PERMS, CHERI_CAP_USER_DATA_OTYPE, (void *)(intptr_t)&sfp->sf_uc, sizeof(sfp->sf_uc), 0); /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; /* * Write out badvaddr, but don't create a valid capability * since that might allow privlege amplification. * * XXX-BD: This probably isn't the right method. * XXX-BD: Do we want to set base or offset? */ *((uintptr_t *)&sf.sf_si.si_addr) = (uintptr_t)(void *)regs->badvaddr; } /* * XXX: No support for undocumented arguments to old style handlers. */ mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyoutcap(&sf, sfp, sizeof(sf)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } /* * Install CHERI signal-delivery register state for handler to run * in. As we don't install this in the CHERI frame on the user stack, * it will be (generally) be removed automatically on sigreturn(). */ /* XXX-BD: this isn't quite right */ cheri_sendsig(td); regs->pc = (register_t)(intptr_t)catcher; regs->sp = (register_t)(intptr_t)sfp; cheri_capability_copy(&capreg->cf_c12, &psp->ps_sigcap[_SIG_IDX(sig)]); cheri_capability_copy(&capreg->cf_c17, &td->td_pcb->pcb_cherisignal.csig_sigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }
static void cheriabi_set_syscall_retval(struct thread *td, int error) { struct trapframe *locr0 = td->td_frame; struct cheri_frame *capreg = &td->td_pcb->pcb_cheriframe; register_t a0; unsigned int code; struct sysentvec *se; code = locr0->v0; a0 = locr0->a0; if (code == CHERIABI_SYS_syscall || code == CHERIABI_SYS___syscall) { code = locr0->a0; a0 = locr0->a1; } se = td->td_proc->p_sysent; /* * When programs start up, they pass through the return path * (maybe via execve?). When this happens, code is an absurd * and out of range value. */ if (code > se->sv_size) code = 0; switch (error) { case 0: locr0->v0 = td->td_retval[0]; locr0->v1 = td->td_retval[1]; locr0->a3 = 0; if (!CHERIABI_SYS_argmap[code].sam_return_ptr) break; switch (code) { case CHERIABI_SYS_mmap: /* * Assuming no one has stomped on it, a0 is the length * requested. * * XXX: In a compressed capability world, we will need * to round up out allocations to a representable size, * not just the end of the page and return that * capability instead. Note well: this will violate * POSIX which assumes fixed page sizes and page * granularity allocations and probably will break * existing code. * * XXXRW: How should we decide what permissions are * appropriate here -- based on the MAP_ arguments? * Perhaps combined with any permissions found in the * optionally passed originating capability? For now, * return permissions appropriate for either data or * code use, and userspace will need to mask them off * as desired. */ if ((void *)td->td_retval[0] == MAP_FAILED) /* XXXBD: is this really what we want? */ cheri_capability_set(&capreg->cf_c3, CHERI_CAP_USER_DATA_PERMS, NULL, 0, 0, -1); else cheri_capability_set(&capreg->cf_c3, CHERI_CAP_USER_DATA_PERMS | CHERI_CAP_USER_CODE_PERMS, NULL, (void *)td->td_retval[0], roundup2((size_t)a0, PAGE_SIZE), 0); break; default: panic("%s: unsupported syscall (%u) returning pointer", __func__, code); } break; case ERESTART: locr0->pc = td->td_pcb->pcb_tpc; break; case EJUSTRETURN: break; /* nothing to do */ default: locr0->v0 = error; locr0->a3 = 1; } }