/* * When a BNDSTX instruction attempts to save bounds to a bounds * table, it will first attempt to look up the table in the * first-level bounds directory. If it does not find a table in * the directory, a #BR is generated and we get here in order to * allocate a new table. * * With 32-bit mode, the size of BD is 4MB, and the size of each * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, * and the size of each bound table is 4MB. */ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) { unsigned long bd_entry, bd_base; struct bndcsr *bndcsr; bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); if (!bndcsr) return -EINVAL; /* * Mask off the preserve and enable bits */ bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK; /* * The hardware provides the address of the missing or invalid * entry via BNDSTATUS, so we don't have to go look it up. */ bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK; /* * Make sure the directory entry is within where we think * the directory is. */ if ((bd_entry < bd_base) || (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) return -EINVAL; return allocate_bt((long __user *)bd_entry); }
static __user void *task_get_bounds_dir(struct task_struct *tsk) { struct bndcsr *bndcsr; if (!cpu_feature_enabled(X86_FEATURE_MPX)) return MPX_INVALID_BOUNDS_DIR; /* * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ fpu_save_init(&tsk->thread.fpu); bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; /* * Make sure the register looks valid by checking the * enable bit. */ if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG)) return MPX_INVALID_BOUNDS_DIR; /* * Lastly, mask off the low bits used for configuration * flags, and return the address of the bounds table. */ return (void __user *)(unsigned long) (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); }
/* * This will go out and modify the XSAVE buffer so that PKRU is * set to a particular state for access to 'pkey'. * * PKRU state does affect kernel access to user memory. We do * not modfiy PKRU *itself* here, only the XSAVE state that will * be restored in to PKRU when we return back to userspace. */ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct pkru_state *old_pkru_state; struct pkru_state new_pkru_state; int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); u32 new_pkru_bits = 0; /* * This check implies XSAVE support. OSPKE only gets * set if we enable XSAVE and we enable PKU in XCR0. */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; /* Set the bits we need in PKRU */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; if (init_val & PKEY_DISABLE_WRITE) new_pkru_bits |= PKRU_WD_BIT; /* Shift the bits in to the correct place in PKRU for pkey. */ new_pkru_bits <<= pkey_shift; /* Locate old copy of the state in the xsave buffer */ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); /* * When state is not in the buffer, it is in the init * state, set it manually. Otherwise, copy out the old * state. */ if (!old_pkru_state) new_pkru_state.pkru = 0; else new_pkru_state.pkru = old_pkru_state->pkru; /* mask off any old bits in place */ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); /* Set the newly-requested bits */ new_pkru_state.pkru |= new_pkru_bits; /* * We could theoretically live without zeroing pkru.pad. * The current XSAVE feature state definition says that * only bytes 0->3 are used. But we do not want to * chance leaking kernel stack out to userspace in case a * memcpy() of the whole xsave buffer was done. * * They're in the same cacheline anyway. */ new_pkru_state.pad = 0; fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); return 0; }
/* * This wraps up the common operations that need to occur when retrieving * data from xsave state. It first ensures that the current task was * using the FPU and retrieves the data in to a buffer. It then calculates * the offset of the requested field in the buffer. * * This function is safe to call whether the FPU is in use or not. * * Note that this only works on the current task. * * Inputs: * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, * XSTATE_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. */ const void *get_xsave_field_ptr(int xsave_state) { struct fpu *fpu = ¤t->thread.fpu; if (!fpu->fpstate_active) return NULL; /* * fpu__save() takes the CPU's xstate registers * and saves them off to the 'fpu memory buffer. */ fpu__save(fpu); return get_xsave_addr(&fpu->state.xsave, xsave_state); }
/* * If a bounds overflow occurs then a #BR is generated. This * function decodes MPX instructions to get violation address * and set this address into extended struct siginfo. * * Note that this is not a super precise way of doing this. * Userspace could have, by the time we get here, written * anything it wants in to the instructions. We can not * trust anything about it. They might not be valid * instructions or might encode invalid registers, etc... * * The caller is expected to kfree() the returned siginfo_t. */ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, struct xsave_struct *xsave_buf) { struct bndreg *bndregs, *bndreg; siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; int err; err = mpx_insn_decode(&insn, regs); if (err) goto err_out; /* * We know at this point that we are only dealing with * MPX instructions. */ insn_get_modrm(&insn); bndregno = X86_MODRM_REG(insn.modrm.value); if (bndregno > 3) { err = -EINVAL; goto err_out; } /* get the bndregs _area_ of the xsave structure */ bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; } /* now go select the individual register in the set of 4 */ bndreg = &bndregs[bndregno]; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { err = -ENOMEM; goto err_out; } /* * The registers are always 64-bit, but the upper 32 * bits are ignored in 32-bit mode. Also, note that the * upper bounds are architecturally represented in 1's * complement form. * * The 'unsigned long' cast is because the compiler * complains when casting from integers to different-size * pointers. */ info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound; info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound; info->si_addr_lsb = 0; info->si_signo = SIGSEGV; info->si_errno = 0; info->si_code = SEGV_BNDERR; info->si_addr = mpx_get_addr_ref(&insn, regs); /* * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ if (info->si_addr == (void *)-1) { err = -EINVAL; goto err_out; } return info; err_out: /* info might be NULL, but kfree() handles that */ kfree(info); return ERR_PTR(err); }