static int cheriabi_set_mcontext(struct thread *td, mcontext_c_t *mcp) { struct trapframe *tp; int tag; if (mcp->mc_regs[0] != UCONTEXT_MAGIC) { printf("mcp->mc_regs[0] != UCONTEXT_MAGIC\n"); return (EINVAL); } tp = td->td_frame; cheri_trapframe_from_cheriframe(tp, &mcp->mc_cheriframe); bcopy((void *)&mcp->mc_regs, (void *)&td->td_frame->zero, sizeof(mcp->mc_regs)); td->td_md.md_flags = (mcp->mc_fpused & MDTD_FPUSED) #ifdef CPU_QEMU_MALTA | (td->td_md.md_flags & MDTD_QTRACE) #endif ; if (mcp->mc_fpused) bcopy((void *)&mcp->mc_fpregs, (void *)&td->td_frame->f0, sizeof(mcp->mc_fpregs)); td->td_frame->pc = mcp->mc_pc; td->td_frame->mullo = mcp->mullo; td->td_frame->mulhi = mcp->mulhi; cheri_capability_copy(&td->td_md.md_tls_cap, &mcp->mc_tls); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &mcp->mc_tls, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(td->td_md.md_tls, CHERI_CR_CTEMP0, CHERI_CR_KDC); else td->td_md.md_tls = NULL; /* Dont let user to set any bits in status and cause registers. */ return (0); }
/* * The CheriABI version of sendsig(9) largely borrows from the MIPS version, * and it is important to keep them in sync. It differs primarily in that it * must also be aware of user stack-handling ABIs, so is also sensitive to our * (fluctuating) design choices in how $stc and $sp interact. The current * design uses ($stc + $sp) for stack-relative references, so early on we have * to calculate a 'relocated' version of $sp that we can then use for * MIPS-style access. * * This code, as with the CHERI-aware MIPS code, makes a privilege * determination in order to decide whether to trust the stack exposed by the * user code for the purposes of signal handling. We must use the alternative * stack if there is any indication that using the user thread's stack state * might violate the userspace compartmentalisation model. */ static void cheriabi_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; struct sigacts *psp; struct sigframe_c sf, *sfp; uintptr_t stackbase; vm_offset_t sp; int cheri_is_sandboxed; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; /* * In CheriABI, $sp is $stc relative, so calculate a relocation base * that must be combined with regs->sp from this point onwards. * Unfortunately, we won't retain bounds and permissions information * (as is the case elsewhere in CheriABI). While 'stackbase' * suggests that $stc's offset isn't included, in practice it will be, * although we may reasonably assume that it will be zero. * * If it turns out we will be delivering to the alternative signal * stack, we'll recalculate stackbase later. */ CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &td->td_pcb->pcb_regs.stc, 0); CHERI_CTOPTR(stackbase, CHERI_CR_CTEMP0, CHERI_CR_KDC); oonstack = sigonstack(stackbase + regs->sp); /* * CHERI affects signal delivery in the following ways: * * (1) Additional capability-coprocessor state is exposed via * extensions to the context frame placed on the stack. * * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we * consider user state to be 'sandboxed' and therefore to require * special delivery handling which includes a domain-switch to the * thread's context-switch domain. (This is done by * cheri_sendsig()). * * (3) If an alternative signal stack is not defined, and we are in a * 'sandboxed' state, then we have two choices: (a) if the signal * is of type SA_SANDBOX_UNWIND, we will automatically unwind the * trusted stack by one frame; (b) otherwise, we will terminate * the process unconditionally. */ cheri_is_sandboxed = cheri_signal_sandboxed(td); /* * We provide the ability to drop into the debugger in two different * circumstances: (1) if the code running is sandboxed; and (2) if the * fault is a CHERI protection fault. Handle both here for the * non-unwind case. Do this before we rewrite any general-purpose or * capability register state for the thread. */ #if DDB if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal) kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox"); else if (sig == SIGPROT && security_cheri_debugger_on_sigprot) kdb_enter(KDB_WHY_CHERI, "SIGPROT delivered outside sandbox"); #endif /* * If a thread is running sandboxed, we can't rely on $sp which may * not point at a valid stack in the ambient context, or even be * maliciously manipulated. We must therefore always use the * alternative stack. We are also therefore unable to tell whether we * are on the alternative stack, so must clear 'oonstack' here. * * XXXRW: This requires significant further thinking; however, the net * upshot is that it is not a good idea to do an object-capability * invoke() from a signal handler, as with so many other things in * life. */ if (cheri_is_sandboxed != 0) oonstack = 0; /* save user context */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; #if 0 /* * XXX-BD: stack_t type differs and we can't just fake a capabilty. * We don't restore the value so what purpose does it serve? */ sf.sf_uc.uc_stack = td->td_sigstk; #endif sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; cheri_capability_copy(&sf.sf_uc.uc_mcontext.mc_tls, &td->td_md.md_tls_cap); sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; #if defined(CPU_HAVEFPU) if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } #endif /* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */ sf.sf_uc.uc_mcontext.cause = regs->cause; cheri_trapframe_to_cheriframe(&td->td_pcb->pcb_regs, &sf.sf_uc.uc_mcontext.mc_cheriframe); /* * Allocate and validate space for the signal handler context. For * CheriABI purposes, 'sp' from this point forward is relocated * relative to any pertinent stack capability. For an alternative * signal context, we need to recalculate stackbase for later use in * calculating a new $sp for the signal-handling context. * * XXXRW: It seems like it would be nice to both the regular and * alternative stack calculations in the same place. However, we need * oonstack sooner. We should clean this up later. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { stackbase = (vm_offset_t)td->td_sigstk.ss_sp; sp = (vm_offset_t)(stackbase + td->td_sigstk.ss_size); } else { /* * Signals delivered when a CHERI sandbox is present must be * delivered on the alternative stack rather than a local one. * If an alternative stack isn't present, then terminate or * risk leaking capabilities (and control) to the sandbox (or * just crashing the sandbox). */ if (cheri_is_sandboxed) { mtx_unlock(&psp->ps_mtx); printf("pid %d, tid %d: signal in sandbox without " "alternative stack defined\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } sp = (vm_offset_t)(stackbase + regs->sp); } sp -= sizeof(struct sigframe_c); /* For CHERI, keep the stack pointer capability aligned. */ sp &= ~(CHERICAP_SIZE - 1); sfp = (void *)sp; /* Build the argument list for the signal handler. */ regs->a0 = sig; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. * * XXXRW: We would ideally synthesise these from the * user-originated stack capability, rather than $kdc, to be * on the safe side. */ cheri_capability_set(®s->c3, CHERI_CAP_USER_DATA_PERMS, (void *)(intptr_t)&sfp->sf_si, sizeof(sfp->sf_si), 0); cheri_capability_set(®s->c4, CHERI_CAP_USER_DATA_PERMS, (void *)(intptr_t)&sfp->sf_uc, sizeof(sfp->sf_uc), 0); /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; /* * Write out badvaddr, but don't create a valid capability * since that might allow privilege amplification. * * XXX-BD: This probably isn't the right method. * XXX-BD: Do we want to set base or offset? * * XXXRW: I think there's some argument that anything * receiving this signal is fairly privileged. But we could * generate a $ddc-relative (or $pcc-relative) capability, if * possible. (Using versions if $ddc and $pcc for the * signal-handling context rather than that which caused the * signal). I'd be tempted to deliver badvaddr as the offset * of that capability. If badvaddr is not in range, then we * should just deliver an untagged NULL-derived version * (perhaps)? */ *((uintptr_t *)&sf.sf_si.si_addr) = (uintptr_t)(void *)regs->badvaddr; } /* * XXX: No support for undocumented arguments to old style handlers. */ mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyoutcap(&sf, (void *)sfp, sizeof(sf)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); printf("pid %d, tid %d: could not copy out sigframe\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } /* * Re-acquire process locks necessary to access suitable pcb fields. * However, arguably, these operations should be atomic with the * initial inspection of 'psp'. */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); /* * Install CHERI signal-delivery register state for handler to run * in. As we don't install this in the CHERI frame on the user stack, * it will be (generally) be removed automatically on sigreturn(). */ /* XXX-BD: this isn't quite right */ cheri_sendsig(td); /* * Note that $sp must be installed relative to $stc, so re-subtract * the stack base here. */ regs->pc = (register_t)(intptr_t)catcher; regs->sp = (register_t)((intptr_t)sfp - stackbase); cheri_capability_copy(®s->c12, &psp->ps_sigcap[_SIG_IDX(sig)]); cheri_capability_copy(®s->c17, &td->td_pcb->pcb_cherisignal.csig_sigcode); }
static int cheriabi_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct trapframe *locr0 = td->td_frame; /* aka td->td_pcb->pcv_regs */ struct sysentvec *se; #ifdef OLD_ARG_HANDLING register_t intargs[8]; uintptr_t ptrargs[8]; u_int tag; int i, isaved, psaved, curint, curptr, nintargs, nptrargs; #endif int error; error = 0; bzero(sa->args, sizeof(sa->args)); /* compute next PC after syscall instruction */ td->td_pcb->pcb_tpc = sa->trapframe->pc; /* Remember if restart */ if (DELAYBRANCH(sa->trapframe->cause)) /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, sa->trapframe->pc, 0, 0); else locr0->pc += sizeof(int); sa->code = locr0->v0; se = td->td_proc->p_sysent; if (se->sv_mask) sa->code &= se->sv_mask; if (sa->code >= se->sv_size) sa->callp = &se->sv_table[0]; else sa->callp = &se->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; #ifndef OLD_ARG_HANDLING error = cheriabi_dispatch_fill_uap(td, sa->code, sa->args); #else intargs[0] = locr0->a0; intargs[1] = locr0->a1; intargs[2] = locr0->a2; intargs[3] = locr0->a3; intargs[4] = locr0->a4; intargs[5] = locr0->a5; intargs[6] = locr0->a6; intargs[7] = locr0->a7; isaved = 8; #if defined(CPU_CHERI_CHERI0) || defined (CPU_CHERI_CHERI8) || defined(CPU_CHERI_CHERI16) #error CHERIABI does not support fewer than 8 argument registers #endif /* * XXXBD: We should ideally use a user capability rather than $kdc * to generate the pointers, but then we have to answer: which one? * * XXXRW: The kernel cannot distinguish between pointers with tags vs. * untagged (possible) integers, which is problematic when a * system-call argument is an intptr_t. We used to just use CToPtr * here, but this caused untagged integer arguments to be lost. Now * we pick one of CToPtr and CToInt based on the tag -- but this is * not really ideal. Instead, we'd prefer that the kernel could * differentiate between the two explicitly using tagged capabilities, * which we're not yet ready to do. */ CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c3, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[0], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[0], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c4, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[1], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[1], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c5, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[2], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[2], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c6, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[3], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[3], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c7, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[4], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[4], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c8, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[5], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[5], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c9, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[6], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[6], CHERI_CR_CTEMP0); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &locr0->c10, 0); CHERI_CGETTAG(tag, CHERI_CR_CTEMP0); if (tag) CHERI_CTOPTR(ptrargs[7], CHERI_CR_CTEMP0, CHERI_CR_KDC); else CHERI_CTOINT(ptrargs[7], CHERI_CR_CTEMP0); psaved = 8; #ifdef TRAP_DEBUG if (trap_debug) printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid); #endif nptrargs = bitcount(CHERIABI_SYS_argmap[sa->code].sam_ptrmask); nintargs = sa->narg - nptrargs; KASSERT(nintargs <= isaved, ("SYSCALL #%u pid:%u, nintargs (%u) > isaved (%u).\n", sa->code, td->td_proc->p_pid, nintargs, isaved)); KASSERT(nptrargs <= psaved, ("SYSCALL #%u pid:%u, nptrargs (%u) > psaved (%u).\n", sa->code, td->td_proc->p_pid, nptrargs, psaved)); /* * Check each argument to see if it is a pointer and pop an argument * off the appropriate list. */ curint = curptr = 0; for (i = 0; i < sa->narg; i++) sa->args[i] = (CHERIABI_SYS_argmap[sa->code].sam_ptrmask & 1 << i) ? ptrargs[curptr++] : intargs[curint++]; #endif /* OLD_ARG_HANDLING */ td->td_retval[0] = 0; td->td_retval[1] = locr0->v1; return (error); }
static int cheriabi_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct trapframe *locr0 = td->td_frame; /* aka td->td_pcb->pcv_regs */ struct cheri_frame *capreg = &td->td_pcb->pcb_cheriframe; register_t intargs[8]; uintptr_t ptrargs[8]; struct sysentvec *se; int error, i, isaved, psaved, curint, curptr, nintargs, nptrargs; error = 0; bzero(sa->args, sizeof(sa->args)); /* compute next PC after syscall instruction */ td->td_pcb->pcb_tpc = sa->trapframe->pc; /* Remember if restart */ if (DELAYBRANCH(sa->trapframe->cause)) /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, sa->trapframe->pc, 0, 0); else locr0->pc += sizeof(int); sa->code = locr0->v0; switch (sa->code) { case CHERIABI_SYS___syscall: case CHERIABI_SYS_syscall: /* * This is an indirect syscall, in which the code is the first * argument. */ sa->code = locr0->a0; intargs[0] = locr0->a1; intargs[1] = locr0->a2; intargs[2] = locr0->a3; intargs[3] = locr0->a4; intargs[4] = locr0->a5; intargs[5] = locr0->a6; intargs[6] = locr0->a7; isaved = 7; break; default: /* * A direct syscall, arguments are just parameters to the syscall. */ intargs[0] = locr0->a0; intargs[1] = locr0->a1; intargs[2] = locr0->a2; intargs[3] = locr0->a3; intargs[4] = locr0->a4; intargs[5] = locr0->a5; intargs[6] = locr0->a6; intargs[7] = locr0->a7; isaved = 8; break; } #if defined(CPU_CHERI_CHERI0) || defined (CPU_CHERI_CHERI8) || defined(CPU_CHERI_CHERI16) #error CHERIABI does not support fewer than 8 argument registers #endif /* * XXXBD: we should idealy use a user capability rather than KDC * to generate the pointers, but then we have to answer: which one? */ CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c3, 0); CHERI_CTOPTR(ptrargs[0], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c4, 0); CHERI_CTOPTR(ptrargs[1], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c5, 0); CHERI_CTOPTR(ptrargs[2], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c6, 0); CHERI_CTOPTR(ptrargs[3], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c7, 0); CHERI_CTOPTR(ptrargs[4], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c8, 0); CHERI_CTOPTR(ptrargs[5], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c9, 0); CHERI_CTOPTR(ptrargs[6], CHERI_CR_CTEMP0, CHERI_CR_KDC); CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &capreg->cf_c10, 0); CHERI_CTOPTR(ptrargs[7], CHERI_CR_CTEMP0, CHERI_CR_KDC); psaved = 8; #ifdef TRAP_DEBUG if (trap_debug) printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid); #endif se = td->td_proc->p_sysent; /* * XXX * Shouldn't this go before switching on the code? */ if (se->sv_mask) sa->code &= se->sv_mask; if (sa->code >= se->sv_size) sa->callp = &se->sv_table[0]; else sa->callp = &se->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; nptrargs = bitcount(CHERIABI_SYS_argmap[sa->code].sam_ptrmask); nintargs = sa->narg - nintargs; KASSERT(nintargs <= isaved, ("SYSCALL #%u pid:%u, nintargs (%u) > isaved (%u).\n", sa->code, td->td_proc->p_pid, nintargs, isaved)); KASSERT(nptrargs <= psaved, ("SYSCALL #%u pid:%u, nptrargs (%u) > psaved (%u).\n", sa->code, td->td_proc->p_pid, nptrargs, psaved)); /* * Check each argument to see if it is a pointer and pop an argument * off the appropriate list. */ curint = curptr = 0; for (i = 0; i < sa->narg; i++) sa->args[i] = (CHERIABI_SYS_argmap[sa->code].sam_ptrmask & 1 << i) ? ptrargs[curptr++] : intargs[curint++]; td->td_retval[0] = 0; td->td_retval[1] = locr0->v1; return (error); }