void translate_syscall(Tracee *tracee) { const bool is_enter_stage = (tracee->status == 0); int status; assert(tracee->exe != NULL); status = fetch_regs(tracee); if (status < 0) return; if (is_enter_stage) { /* Never restore original register values at the end * of this stage. */ tracee->restore_original_regs = false; print_current_regs(tracee, 3, "sysenter start"); save_current_regs(tracee, ORIGINAL); translate_syscall_enter(tracee); print_current_regs(tracee, 5, "sysenter end"); save_current_regs(tracee, MODIFIED); /* Restore tracee's stack pointer now if it won't hit * the sysexit stage (i.e. when seccomp is enabled and * there's nothing else to do). */ if (tracee->restart_how == PTRACE_CONT) { tracee->status = 0; poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, STACK_POINTER)); } } else { /* By default, restore original register values at the * end of this stage. */ tracee->restore_original_regs = true; print_current_regs(tracee, 5, "sysexit start"); translate_syscall_exit(tracee); print_current_regs(tracee, 4, "sysexit end"); } (void) push_regs(tracee); }
void translate_syscall(Tracee *tracee) { const bool is_enter_stage = IS_IN_SYSENTER(tracee); int status; assert(tracee->exe != NULL); status = fetch_regs(tracee); if (status < 0) return; if (is_enter_stage) { /* Never restore original register values at the end * of this stage. */ tracee->restore_original_regs = false; print_current_regs(tracee, 3, "sysenter start"); /* Translate the syscall only if it was actually * requested by the tracee, it is not a syscall * chained by PRoot. */ if (tracee->chain.syscalls == NULL) { save_current_regs(tracee, ORIGINAL); status = translate_syscall_enter(tracee); save_current_regs(tracee, MODIFIED); } else { status = notify_extensions(tracee, SYSCALL_CHAINED_ENTER, 0, 0); tracee->restart_how = PTRACE_SYSCALL; } /* Remember the tracee status for the "exit" stage and * avoid the actual syscall if an error was reported * by the translation/extension. */ if (status < 0) { set_sysnum(tracee, PR_void); poke_reg(tracee, SYSARG_RESULT, (word_t) status); tracee->status = status; } else tracee->status = 1; /* Restore tracee's stack pointer now if it won't hit * the sysexit stage (i.e. when seccomp is enabled and * there's nothing else to do). */ if (tracee->restart_how == PTRACE_CONT) { tracee->status = 0; poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, STACK_POINTER)); } } else { /* By default, restore original register values at the * end of this stage. */ tracee->restore_original_regs = true; print_current_regs(tracee, 5, "sysexit start"); /* Translate the syscall only if it was actually * requested by the tracee, it is not a syscall * chained by PRoot. */ if (tracee->chain.syscalls == NULL) translate_syscall_exit(tracee); else (void) notify_extensions(tracee, SYSCALL_CHAINED_EXIT, 0, 0); /* Reset the tracee's status. */ tracee->status = 0; /* Insert the next chained syscall, if any. */ if (tracee->chain.syscalls != NULL) chain_next_syscall(tracee); } (void) push_regs(tracee); if (is_enter_stage) print_current_regs(tracee, 5, "sysenter end" ); else print_current_regs(tracee, 4, "sysexit end"); }
/** * Start the loading of @tracee. This function returns no error since * it's either too late to do anything useful (the calling process is * already replaced) or the error reported by the kernel * (syscall_result < 0) will be propagated as-is. */ void translate_execve_exit(Tracee *tracee) { word_t syscall_result; int status; if (IS_NOTIFICATION_PTRACED_LOAD_DONE(tracee)) { /* Be sure not to confuse the ptracer with an * unexpected syscall/returned value. */ poke_reg(tracee, SYSARG_RESULT, 0); set_sysnum(tracee, PR_execve); /* According to most ABIs, all registers have * undefined values at program startup except: * * - the stack pointer * - the instruction pointer * - the rtld_fini pointer * - the state flags */ poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_2)); poke_reg(tracee, INSTR_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_3)); poke_reg(tracee, RTLD_FINI, 0); poke_reg(tracee, STATE_FLAGS, 0); /* Restore registers to their current values. */ save_current_regs(tracee, ORIGINAL); tracee->_regs_were_changed = true; /* This is is required to make GDB work correctly * under PRoot, however it deserves to be used * unconditionally. */ (void) bind_proc_pid_auxv(tracee); /* If the PTRACE_O_TRACEEXEC option is *not* in effect * for the execing tracee, the kernel delivers an * extra SIGTRAP to the tracee after execve(2) * *returns*. This is an ordinary signal (similar to * one which can be generated by "kill -TRAP"), not a * special kind of ptrace-stop. Employing * PTRACE_GETSIGINFO for this signal returns si_code * set to 0 (SI_USER). This signal may be blocked by * signal mask, and thus may be delivered (much) * later. -- man 2 ptrace * * This signal is delayed so far since the program was * not fully loaded yet; GDB would get "invalid * adress" errors otherwise. */ if ((tracee->as_ptracee.options & PTRACE_O_TRACEEXEC) == 0) kill(tracee->pid, SIGTRAP); return; } syscall_result = peek_reg(tracee, CURRENT, SYSARG_RESULT); if ((int) syscall_result < 0) return; /* Execve happened; commit the new "/proc/self/exe". */ if (tracee->new_exe != NULL) { (void) talloc_unlink(tracee, tracee->exe); tracee->exe = talloc_reference(tracee, tracee->new_exe); talloc_set_name_const(tracee->exe, "$exe"); } /* New processes have no heap. */ bzero(tracee->heap, sizeof(Heap)); /* Transfer the load script to the loader. */ status = transfer_load_script(tracee); if (status < 0) note(tracee, ERROR, INTERNAL, "can't transfer load script: %s", strerror(-status)); return; }
/** * Convert @tracee->load_info into a load script, then transfer this * latter into @tracee's memory. */ static int transfer_load_script(Tracee *tracee) { const word_t stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); static word_t page_size = 0; static word_t page_mask = 0; word_t entry_point; size_t script_size; size_t strings_size; size_t string1_size; size_t string2_size; size_t string3_size; size_t padding_size; word_t string1_address; word_t string2_address; word_t string3_address; void *buffer; size_t buffer_size; bool needs_executable_stack; LoadStatement *statement; void *cursor; int status; if (page_size == 0) { page_size = sysconf(_SC_PAGE_SIZE); if ((int) page_size <= 0) page_size = 0x1000; page_mask = ~(page_size - 1); } needs_executable_stack = (tracee->load_info->needs_executable_stack || ( tracee->load_info->interp != NULL && tracee->load_info->interp->needs_executable_stack)); /* Strings addresses are required to generate the load script, * for "open" actions. Since I want to generate it in one * pass, these strings will be put right below the current * stack pointer -- the only known adresses so far -- in the * "strings area". */ string1_size = strlen(tracee->load_info->user_path) + 1; string2_size = (tracee->load_info->interp == NULL ? 0 : strlen(tracee->load_info->interp->user_path) + 1); string3_size = (tracee->load_info->raw_path == tracee->load_info->user_path ? 0 : strlen(tracee->load_info->raw_path) + 1); /* A padding will be appended at the end of the load script * (a.k.a "strings area") to ensure this latter is aligned to * a word boundary, for sake of performance. */ padding_size = (stack_pointer - string1_size - string2_size - string3_size) % sizeof_word(tracee); strings_size = string1_size + string2_size + string3_size + padding_size; string1_address = stack_pointer - strings_size; string2_address = stack_pointer - strings_size + string1_size; string3_address = (string3_size == 0 ? string1_address : stack_pointer - strings_size + string1_size + string2_size); /* Compute the size of the load script. */ script_size = LOAD_STATEMENT_SIZE(*statement, open) + (LOAD_STATEMENT_SIZE(*statement, mmap) * talloc_array_length(tracee->load_info->mappings)) + (tracee->load_info->interp == NULL ? 0 : LOAD_STATEMENT_SIZE(*statement, open) + (LOAD_STATEMENT_SIZE(*statement, mmap) * talloc_array_length(tracee->load_info->interp->mappings))) + (needs_executable_stack ? LOAD_STATEMENT_SIZE(*statement, make_stack_exec) : 0) + LOAD_STATEMENT_SIZE(*statement, start); /* Allocate enough room for both the load script and the * strings area. */ buffer_size = script_size + strings_size; buffer = talloc_zero_size(tracee->ctx, buffer_size); if (buffer == NULL) return -ENOMEM; cursor = buffer; /* Load script statement: open. */ statement = cursor; statement->action = LOAD_ACTION_OPEN; statement->open.string_address = string1_address; cursor += LOAD_STATEMENT_SIZE(*statement, open); /* Load script statements: mmap. */ cursor = transcript_mappings(cursor, tracee->load_info->mappings); if (tracee->load_info->interp != NULL) { /* Load script statement: open. */ statement = cursor; statement->action = LOAD_ACTION_OPEN_NEXT; statement->open.string_address = string2_address; cursor += LOAD_STATEMENT_SIZE(*statement, open); /* Load script statements: mmap. */ cursor = transcript_mappings(cursor, tracee->load_info->interp->mappings); entry_point = ELF_FIELD(tracee->load_info->interp->elf_header, entry); } else entry_point = ELF_FIELD(tracee->load_info->elf_header, entry); if (needs_executable_stack) { /* Load script statement: stack_exec. */ statement = cursor; statement->action = LOAD_ACTION_MAKE_STACK_EXEC; statement->make_stack_exec.start = stack_pointer & page_mask; cursor += LOAD_STATEMENT_SIZE(*statement, make_stack_exec); } /* Load script statement: start. */ statement = cursor; /* Start of the program slightly differs when ptraced. */ if (tracee->as_ptracee.ptracer != NULL) statement->action = LOAD_ACTION_START_TRACED; else statement->action = LOAD_ACTION_START; statement->start.stack_pointer = stack_pointer; statement->start.entry_point = entry_point; statement->start.at_phent = ELF_FIELD(tracee->load_info->elf_header, phentsize); statement->start.at_phnum = ELF_FIELD(tracee->load_info->elf_header, phnum); statement->start.at_entry = ELF_FIELD(tracee->load_info->elf_header, entry); statement->start.at_phdr = ELF_FIELD(tracee->load_info->elf_header, phoff) + tracee->load_info->mappings[0].addr; statement->start.at_execfn = string3_address; cursor += LOAD_STATEMENT_SIZE(*statement, start); /* Sanity check. */ assert((uintptr_t) cursor - (uintptr_t) buffer == script_size); /* Convert the load script to the expected format. */ if (is_32on64_mode(tracee)) { int i; for (i = 0; buffer + i * sizeof(uint64_t) < cursor; i++) ((uint32_t *) buffer)[i] = ((uint64_t *) buffer)[i]; } /* Concatenate the load script and the strings. */ memcpy(cursor, tracee->load_info->user_path, string1_size); cursor += string1_size; if (string2_size != 0) { memcpy(cursor, tracee->load_info->interp->user_path, string2_size); cursor += string2_size; } if (string3_size != 0) { memcpy(cursor, tracee->load_info->raw_path, string3_size); cursor += string3_size; } /* Sanity check. */ cursor += padding_size; assert((uintptr_t) cursor - (uintptr_t) buffer == buffer_size); /* Allocate enough room in tracee's memory for the load * script, and make the first user argument points to this * location. Note that it is safe to update the stack pointer * manually since we are in execve sysexit. However it should * be done before transfering data since the kernel might not * allow page faults below the stack pointer. */ poke_reg(tracee, STACK_POINTER, stack_pointer - buffer_size); poke_reg(tracee, USERARG_1, stack_pointer - buffer_size); /* Copy everything in the tracee's memory at once. */ status = write_data(tracee, stack_pointer - buffer_size, buffer, buffer_size); if (status < 0) return status; /* Tracee's stack content is now as follow: * * +------------+ <- initial stack pointer (higher address) * | padding | * +------------+ * | string3 | * +------------+ * | string2 | * +------------+ * | string1 | * +------------+ * | start | * +------------+ * | mmap anon | * +------------+ * | mmap file | * +------------+ * | open next | * +------------+ * | mmap anon. | * +------------+ * | mmap file | * +------------+ * | open | * +------------+ <- stack pointer, userarg1 (word aligned) */ /* Remember we are in the sysexit stage, so be sure the * current register values will be used as-is at the end. */ save_current_regs(tracee, ORIGINAL); tracee->_regs_were_changed = true; return 0; }