/* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) return 0; /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. * Also, jump optimization will change the breakpoint to * relative-jump. Since the relative-jump itself is * normally used, we just go through if there is no kprobe. */ __addr = recover_probed_instruction(buf, addr); kernel_insn_init(&insn, (void *)__addr); insn_get_length(&insn); /* * Another debugging subsystem might insert this breakpoint. * In that case, we can't recover it. */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; addr += insn.length; } return (addr == paddr); }
/* * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. * If it does, Return the address of the 32-bit displacement word. * If not, return null. * Only applicable to 64-bit x86. */ int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; int length; unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); if (!recovered_insn) return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); length = insn.length; /* Another subsystem puts a breakpoint, failed to recover */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; pax_open_kernel(); memcpy(dest, insn.kaddr, length); pax_close_kernel(); #ifdef CONFIG_X86_64 if (insn_rip_relative(&insn)) { s64 newdisp; u8 *disp; kernel_insn_init(&insn, dest, length); insn_get_displacement(&insn); /* * The copied instruction uses the %rip-relative addressing * mode. Adjust the displacement for the difference between * the original location of this instruction and the location * of the copy that will actually be run. The tricky bit here * is making sure that the sign extension happens correctly in * this calculation, since we need a signed 32-bit result to * be sign-extended to 64 bits when it's added to the %rip * value and yield the same 64-bit result that the sign- * extension of the original signed 32-bit displacement would * have given. */ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(&insn); pax_open_kernel(); *(s32 *) disp = (s32) newdisp; pax_close_kernel(); } #endif return length; }
/* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative * addressing mode. Note that since @real will be the final place of copied * instruction, displacement must be adjust by @real, not @dest. * This returns the length of copied instruction, or 0 if it has an error. */ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) { kprobe_opcode_t buf[MAX_INSN_SIZE]; unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); if (!recovered_insn || !insn) return 0; /* This can access kernel text if given address is not recovered */ if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); insn_get_length(insn); /* Another subsystem puts a breakpoint, failed to recover */ if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; /* We should not singlestep on the exception masking instructions */ if (insn_masking_exception(insn)) return 0; #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(insn)) { s64 newdisp; u8 *disp; /* * The copied instruction uses the %rip-relative addressing * mode. Adjust the displacement for the difference between * the original location of this instruction and the location * of the copy that will actually be run. The tricky bit here * is making sure that the sign extension happens correctly in * this calculation, since we need a signed 32-bit result to * be sign-extended to 64 bits when it's added to the %rip * value and yield the same 64-bit result that the sign- * extension of the original signed 32-bit displacement would * have given. */ newdisp = (u8 *) src + (s64) insn->displacement.value - (u8 *) real; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); *(s32 *) disp = (s32) newdisp; } #endif return insn->length; }
/* * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. * If it does, Return the address of the 32-bit displacement word. * If not, return null. * Only applicable to 64-bit x86. */ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) { struct insn insn; int ret; kprobe_opcode_t buf[MAX_INSN_SIZE]; kernel_insn_init(&insn, src); if (recover) { insn_get_opcode(&insn); if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { ret = recover_probed_instruction(buf, (unsigned long)src); if (ret) return 0; kernel_insn_init(&insn, buf); } } insn_get_length(&insn); memcpy(dest, insn.kaddr, insn.length); #ifdef CONFIG_X86_64 if (insn_rip_relative(&insn)) { s64 newdisp; u8 *disp; kernel_insn_init(&insn, dest); insn_get_displacement(&insn); /* * The copied instruction uses the %rip-relative addressing * mode. Adjust the displacement for the difference between * the original location of this instruction and the location * of the copy that will actually be run. The tricky bit here * is making sure that the sign extension happens correctly in * this calculation, since we need a signed 32-bit result to * be sign-extended to 64 bits when it's added to the %rip * value and yield the same 64-bit result that the sign- * extension of the original signed 32-bit displacement would * have given. */ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ disp = (u8 *) dest + insn_offset_displacement(&insn); *(s32 *) disp = (s32) newdisp; } #endif return insn.length; }
static int mpx_insn_decode(struct insn *insn, struct pt_regs *regs) { unsigned char buf[MAX_INSN_SIZE]; int x86_64 = !test_thread_flag(TIF_IA32); int not_copied; int nr_copied; not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); nr_copied = sizeof(buf) - not_copied; /* * The decoder _should_ fail nicely if we pass it a short buffer. * But, let's not depend on that implementation detail. If we * did not get anything, just error out now. */ if (!nr_copied) return -EFAULT; insn_init(insn, buf, nr_copied, x86_64); insn_get_length(insn); /* * copy_from_user() tries to get as many bytes as we could see in * the largest possible instruction. If the instruction we are * after is shorter than that _and_ we attempt to copy from * something unreadable, we might get a short read. This is OK * as long as the read did not stop in the middle of the * instruction. Check to see if we got a partial instruction. */ if (nr_copied < insn->length) return -EFAULT; insn_get_opcode(insn); /* * We only _really_ need to decode bndcl/bndcn/bndcu * Error out on anything else. */ if (insn->opcode.bytes[0] != 0x0f) goto bad_opcode; if ((insn->opcode.bytes[1] != 0x1a) && (insn->opcode.bytes[1] != 0x1b)) goto bad_opcode; return 0; bad_opcode: return -EINVAL; }
int main(int argc, char **argv) { struct insn insn; int insns = 0; int errors = 0; unsigned long i; unsigned char insn_buf[MAX_INSN_SIZE * 2]; parse_args(argc, argv); /* */ memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); for (i = 0; i < iter_end; i++) { if (generate_insn(insn_buf) <= 0) break; if (i < iter_start) /* */ continue; /* */ insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.next_byte <= insn.kaddr || insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { /* */ dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); errors++; } else if (verbose && !insn_complete(&insn)) dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); else if (verbose >= 2) dump_insn(stdout, &insn); insns++; } fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); return errors ? 1 : 0; }
/* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { int ret; unsigned long addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) return 0; /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { kernel_insn_init(&insn, (void *)addr); insn_get_opcode(&insn); /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { ret = recover_probed_instruction(buf, addr); if (ret) /* * Another debugging subsystem might insert * this breakpoint. In that case, we can't * recover it. */ return 0; kernel_insn_init(&insn, buf); } insn_get_length(&insn); addr += insn.length; } return (addr == paddr); }
/* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address * accordingly. (The contents of the scratch register will be saved * before we single-step the modified instruction, and restored * afterward.) * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL * area typically lies beyond that area. At least for instructions * that store to memory, we can't execute the original instruction * and "fix things up" later, because the misdirected store could be * disastrous. * * Some useful facts about rip-relative instructions: * * - There's always a modrm byte. * - There's never a SIB byte. * - The displacement is always 4 bytes. */ static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) { u8 *cursor; u8 reg; if (mm->context.ia32_compat) return; auprobe->rip_rela_target_address = 0x0; if (!insn_rip_relative(insn)) return; /* * insn_rip_relative() would have decoded rex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): * we want to encode rax/rcx, not r8/r9. */ if (insn->rex_prefix.nbytes) { cursor = auprobe->insn + insn_offset_rex_prefix(insn); *cursor &= 0xfe; /* Clearing REX.B bit */ } /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); insn_get_length(insn); /* * Convert from rip-relative addressing to indirect addressing * via a scratch register. Change the r/m field from 0x5 (%rip) * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. */ reg = MODRM_REG(insn); if (reg == 0) { /* * The register operand (if any) is either the A register * (%rax, %eax, etc.) or (if the 0x4 bit is set in the * REX prefix) %r8. In any case, we know the C register * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ auprobe->fixups = UPROBE_FIX_RIP_CX; /* Change modrm from 00 000 101 to 00 000 001. */ *cursor = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ auprobe->fixups = UPROBE_FIX_RIP_AX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *cursor = (reg << 3); } /* Target address = address of next instruction + (signed) offset */ auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; /* Displacement field is gone; slide immediate field (if any) over. */ if (insn->immediate.nbytes) { cursor++; memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); } return; }
/* Decode and process the instruction ('c_insn') at * the address 'kaddr' - see the description of do_process_area for details. * * Check if we get past the end of the buffer [kaddr, end_kaddr) * * The function returns the length of the instruction in bytes. * 0 is returned in case of failure. */ static unsigned int do_process_insn(struct insn* c_insn, void* kaddr, void* end_kaddr, void** from_funcs, void** to_funcs, unsigned int nfuncs) { /* ptr to the 32-bit offset argument in the instruction */ u32* offset = NULL; /* address of the function being called */ void* addr = NULL; static const unsigned char op = 0xe8; /* 'call <offset>' */ int i; BUG_ON(from_funcs == NULL || to_funcs == NULL); /* Decode the instruction and populate 'insn' structure */ kernel_insn_init(c_insn, kaddr); insn_get_length(c_insn); if (c_insn->length == 0) { return 0; } if (kaddr + c_insn->length > end_kaddr) { /* Note: it is OK to stop at 'end_kaddr' but no further */ KEDR_MSG(COMPONENT_STRING "instruction decoder stopped past the end of the section.\n"); insn_get_opcode(c_insn); printk(KERN_ALERT COMPONENT_STRING "kaddr=%p, end_kaddr=%p, c_insn->length=%d, opcode=0x%x\n", (void*)kaddr, (void*)end_kaddr, (int)c_insn->length, (unsigned int)c_insn->opcode.value ); WARN_ON(1); } /* This call may be overkill as insn_get_length() probably has to decode * the instruction completely. * Still, to operate safely, we need insn_get_opcode() before we can access * c_insn->opcode. * The call is cheap anyway, no re-decoding is performed. */ insn_get_opcode(c_insn); if (c_insn->opcode.value != op) { /* Not a 'call' instruction, nothing to do. */ return c_insn->length; } /* [NB] For some reason, the decoder stores the argument of 'call' and 'jmp' * as 'immediate' rather than 'displacement' (as Intel manuals name it). * May be it is a bug, may be it is not. * Meanwhile, I'll call this value 'offset' to avoid confusion. */ /* Call this before trying to access c_insn->immediate */ insn_get_immediate(c_insn); if (c_insn->immediate.nbytes != 4) { KEDR_MSG(COMPONENT_STRING "at 0x%p: " "opcode: 0x%x, " "immediate field is %u rather than 32 bits in size; " "insn.length = %u, insn.imm = %u, off_immed = %d\n", kaddr, (unsigned int)c_insn->opcode.value, 8 * (unsigned int)c_insn->immediate.nbytes, c_insn->length, (unsigned int)c_insn->immediate.value, insn_offset_immediate(c_insn)); WARN_ON(1); return c_insn->length; } offset = (u32*)(kaddr + insn_offset_immediate(c_insn)); addr = CALL_ADDR_FROM_OFFSET(kaddr, c_insn->length, *offset); /* Check if one of the functions of interest is called */ for (i = 0; i < nfuncs; ++i) { if (addr == from_funcs[i]) { /* Change the address of the function to be called */ BUG_ON(to_funcs[i] == NULL); KEDR_MSG(COMPONENT_STRING "at 0x%p: changing address 0x%p to 0x%p (displ: 0x%x to 0x%x)\n", kaddr, from_funcs[i], to_funcs[i], (unsigned int)(*offset), (unsigned int)CALL_OFFSET_FROM_ADDR( kaddr, c_insn->length, to_funcs[i]) ); *offset = CALL_OFFSET_FROM_ADDR( kaddr, c_insn->length, to_funcs[i] ); break; } } return c_insn->length; }
int symbol_hijack(struct kernsym *sym, const char *symbol_name, unsigned long *code) { int ret; unsigned long orig_addr; unsigned long dest_addr; unsigned long end_addr; u32 *poffset; struct insn insn; bool pte_ro; ret = find_symbol_address(sym, symbol_name); if (IN_ERR(ret)) return ret; if (*(u8 *)sym->addr == OP_JMP_REL32) { printk(PKPRE "error: %s already appears to be hijacked\n", symbol_name); return -EFAULT; } sym->new_addr = malloc(sym->size); if (sym->new_addr == NULL) { printk(PKPRE "Failed to allocate buffer of size %lu for %s\n", sym->size, sym->name); return -ENOMEM; } memset(sym->new_addr, 0, (size_t)sym->size); if (sym->size < OP_JMP_SIZE) { ret = -EFAULT; goto out_error; } orig_addr = (unsigned long)sym->addr; dest_addr = (unsigned long)sym->new_addr; end_addr = orig_addr + sym->size; while (end_addr > orig_addr && *(u8 *)(end_addr - 1) == '\0') --end_addr; if (orig_addr == end_addr) { printk(PKPRE "A spurious symbol \"%s\" (address: %p) seems to contain only zeros\n", sym->name, sym->addr); ret = -EILSEQ; goto out_error; } while (orig_addr < end_addr) { kernel_insn_init(&insn, (void *)orig_addr); insn_get_length(&insn); if (insn.length == 0) { printk(PKPRE "Failed to decode instruction at %p (%s+0x%lx)\n", (const void *)orig_addr, sym->name, orig_addr - (unsigned long)sym->addr); ret = -EILSEQ; goto out_error; } copy_and_fixup_insn(&insn, (void *)dest_addr, sym); orig_addr += insn.length; dest_addr += insn.length; } sym->new_size = dest_addr - (unsigned long)sym->new_addr; sym->run = sym->new_addr; set_addr_rw((unsigned long) sym->addr, &pte_ro); memcpy(&sym->orig_start_bytes[0], sym->addr, OP_JMP_SIZE); *(u8 *)sym->addr = OP_JMP_REL32; poffset = (u32 *)((unsigned long)sym->addr + 1); *poffset = CODE_OFFSET_FROM_ADDR((unsigned long)sym->addr, OP_JMP_SIZE, (unsigned long)code); set_addr_ro((unsigned long) sym->addr, pte_ro); sym->hijacked = true; return 0; out_error: malloc_free(sym->new_addr); return ret; }
int main(int argc, char **argv) { char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; unsigned char insn_buf[16]; struct insn insn; int insns = 0; int warnings = 0; parse_args(argc, argv); while (fgets(line, BUFSIZE, stdin)) { char copy[BUFSIZE], *s, *tab1, *tab2; int nb = 0; unsigned int b; if (line[0] == '<') { /* Symbol line */ strcpy(sym, line); continue; } insns++; memset(insn_buf, 0, 16); strcpy(copy, line); tab1 = strchr(copy, '\t'); if (!tab1) malformed_line(line, insns); s = tab1 + 1; s += strspn(s, " "); tab2 = strchr(s, '\t'); if (!tab2) malformed_line(line, insns); *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ while (s < tab2) { if (sscanf(s, "%x", &b) == 1) { insn_buf[nb++] = (unsigned char) b; s += 3; } else break; } /* Decode an instruction */ insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); insn_get_length(&insn); if (insn.length != nb) { warnings++; pr_warn("Found an x86 instruction decoder bug, " "please report this.\n", sym); pr_warn("%s", line); pr_warn("objdump says %d bytes, but insn_get_length() " "says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); } } if (warnings) pr_warn("Decoded and checked %d instructions with %d " "failures\n", insns, warnings); else fprintf(stdout, "%s: success: Decoded and checked %d" " instructions\n", prog, insns); return 0; }