void translate_brk_exit(Tracee *tracee) { word_t result; word_t sysnum; int tracee_errno; assert(heap_offset > 0); sysnum = get_sysnum(tracee, MODIFIED); result = peek_reg(tracee, CURRENT, SYSARG_RESULT); tracee_errno = (int) result; switch (sysnum) { case PR_void: poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); break; case PR_mmap: case PR_mmap2: /* On error, mmap(2) returns -errno (the last 4k is * reserved for this), whereas brk(2) returns the * previous value. */ if (tracee_errno < 0 && tracee_errno > -4096) { poke_reg(tracee, SYSARG_RESULT, 0); break; } tracee->heap->base = result + heap_offset; tracee->heap->size = 0; poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); break; case PR_mremap: /* On error, mremap(2) returns -errno (the last 4k is * reserved this), whereas brk(2) returns the previous * value. */ if ( (tracee_errno < 0 && tracee_errno > -4096) || (tracee->heap->base != result + heap_offset)) { poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); break; } tracee->heap->size = peek_reg(tracee, MODIFIED, SYSARG_3) - heap_offset; poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); break; default: assert(0); } DEBUG_BRK("brk() = 0x%lx\n", peek_reg(tracee, CURRENT, SYSARG_RESULT)); }
/** * Allocate @size bytes in the @tracee's memory space. This function * returns the address of the allocated memory in the @tracee's memory * space, otherwise 0 if an error occured. */ word_t alloc_mem(Tracee *tracee, ssize_t size) { word_t stack_pointer; /* Get the current value of the stack pointer from the tracee's * USER area. */ stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); /* Some ABIs specify an amount of bytes after the stack * pointer that shall not be used by anything but the compiler * (for optimization purpose). */ if (stack_pointer == peek_reg(tracee, ORIGINAL, STACK_POINTER)) size += RED_ZONE_SIZE; /* Sanity check. */ if ( (size > 0 && stack_pointer <= size) || (size < 0 && stack_pointer >= ULONG_MAX + size)) { notice(tracee, WARNING, INTERNAL, "integer under/overflow detected in %s", __FUNCTION__); return 0; } /* Remember the stack grows downward. */ stack_pointer -= size; /* Set the new value of the stack pointer in the tracee's USER * area. */ poke_reg(tracee, STACK_POINTER, stack_pointer); return stack_pointer; }
/** * Modify the current syscall of @tracee as described by @modif * regarding the given @config. This function returns whether the * syscall was modified or not. */ static bool modify_syscall(Tracee *tracee, const Config *config, const Modif *modif) { size_t i, j; assert(config != NULL); if (!needs_kompat(config, modif->expected_release)) return false; set_sysnum(tracee, modif->new_sysarg_num); /* Shift syscall arguments. */ for (i = 0; i < MAX_ARG_SHIFT; i++) { Reg sysarg = modif->shifts[i].sysarg; size_t nb_args = modif->shifts[i].nb_args; int offset = modif->shifts[i].offset; for (j = 0; j < nb_args; j++) { word_t arg = peek_reg(tracee, CURRENT, sysarg + j); poke_reg(tracee, sysarg + j + offset, arg); } } return true; }
/** * Modify the current syscall of @tracee as described by @modif * regarding the given @config. This function returns whether the * syscall was modified or not. */ static bool modify_syscall(Tracee *tracee, const Config *config, const Modif *modif) { size_t i, j; word_t syscall; assert(config != NULL); if (!needs_kompat(config, modif->expected_release)) return false; /* Check if this syscall is supported on this architecture. */ syscall = detranslate_sysnum(get_abi(tracee), modif->new_sysarg_num); if (syscall == SYSCALL_AVOIDER) return false; set_sysnum(tracee, modif->new_sysarg_num); /* Shift syscall arguments. */ for (i = 0; i < MAX_ARG_SHIFT; i++) { Reg sysarg = modif->shifts[i].sysarg; size_t nb_args = modif->shifts[i].nb_args; int offset = modif->shifts[i].offset; for (j = 0; j < nb_args; j++) { word_t arg = peek_reg(tracee, CURRENT, sysarg + j); poke_reg(tracee, sysarg + j + offset, arg); } } return true; }
/** * Remove @discarded_flags from the given @tracee's @sysarg register * if the actual kernel release is not compatible with the * @expected_release. */ static void discard_fd_flags(Tracee *tracee, const Config *config, int discarded_flags, int expected_release, Reg sysarg) { word_t flags; if (!needs_kompat(config, expected_release)) return; flags = peek_reg(tracee, CURRENT, sysarg); poke_reg(tracee, sysarg, flags & ~discarded_flags); }
void translate_syscall(Tracee *tracee) { const bool is_enter_stage = (tracee->status == 0); int status; assert(tracee->exe != NULL); status = fetch_regs(tracee); if (status < 0) return; if (is_enter_stage) { /* Never restore original register values at the end * of this stage. */ tracee->restore_original_regs = false; print_current_regs(tracee, 3, "sysenter start"); save_current_regs(tracee, ORIGINAL); translate_syscall_enter(tracee); print_current_regs(tracee, 5, "sysenter end"); save_current_regs(tracee, MODIFIED); /* Restore tracee's stack pointer now if it won't hit * the sysexit stage (i.e. when seccomp is enabled and * there's nothing else to do). */ if (tracee->restart_how == PTRACE_CONT) { tracee->status = 0; poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, STACK_POINTER)); } } else { /* By default, restore original register values at the * end of this stage. */ tracee->restore_original_regs = true; print_current_regs(tracee, 5, "sysexit start"); translate_syscall_exit(tracee); print_current_regs(tracee, 4, "sysexit end"); } (void) push_regs(tracee); }
/** * Copy @size bytes of the data pointed to by @tracer_ptr into a * @tracee's memory block and make the @reg argument of the current * syscall points to this new block. This function returns -errno if * an error occured, otherwise 0. */ static int set_sysarg_data(Tracee *tracee, const void *tracer_ptr, word_t size, Reg reg) { word_t tracee_ptr; int status; /* Allocate space into the tracee's memory to host the new data. */ tracee_ptr = alloc_mem(tracee, size); if (tracee_ptr == 0) return -EFAULT; /* Copy the new data into the previously allocated space. */ status = write_data(tracee, tracee_ptr, tracer_ptr, size); if (status < 0) return status; /* Make this argument point to the new data. */ poke_reg(tracee, reg, tracee_ptr); return 0; }
/** * Allocate @size bytes in the @tracee's memory space. This function * returns the address of the allocated memory in the @tracee's memory * space, otherwise 0 if an error occured. */ word_t alloc_mem(Tracee *tracee, ssize_t size) { word_t stack_pointer; /* This function should be called in sysenter only since the * stack pointer is systematically restored at the end of * sysexit (except for execve, but in this case the stack * pointer should be handled with care since it is used by the * process to retrieve argc, argv, envp, and auxv). */ assert(IS_IN_SYSENTER(tracee)); /* Get the current value of the stack pointer from the tracee's * USER area. */ stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); /* Some ABIs specify an amount of bytes after the stack * pointer that shall not be used by anything but the compiler * (for optimization purpose). */ if (stack_pointer == peek_reg(tracee, ORIGINAL, STACK_POINTER)) size += RED_ZONE_SIZE; /* Sanity check. */ if ( (size > 0 && stack_pointer <= (word_t) size) || (size < 0 && stack_pointer >= ULONG_MAX + size)) { note(tracee, WARNING, INTERNAL, "integer under/overflow detected in %s", __FUNCTION__); return 0; } /* Remember the stack grows downward. */ stack_pointer -= size; /* Set the new value of the stack pointer in the tracee's USER * area. */ poke_reg(tracee, STACK_POINTER, stack_pointer); return stack_pointer; }
/** * Translate the output arguments of the current @tracee's syscall in * the @tracee->pid process area. This function sets the result of * this syscall to @tracee->status if an error occured previously * during the translation, that is, if @tracee->status is less than 0. */ void translate_syscall_exit(Tracee *tracee) { word_t syscall_number; word_t syscall_result; int status; status = notify_extensions(tracee, SYSCALL_EXIT_START, 0, 0); if (status < 0) { poke_reg(tracee, SYSARG_RESULT, (word_t) status); goto end; } if (status > 0) return; /* Set the tracee's errno if an error occured previously during * the translation. */ if (tracee->status < 0) { poke_reg(tracee, SYSARG_RESULT, (word_t) tracee->status); goto end; } /* Translate output arguments: * - break: update the syscall result register with "status" * - goto end: nothing else to do. */ syscall_number = get_sysnum(tracee, ORIGINAL); syscall_result = peek_reg(tracee, CURRENT, SYSARG_RESULT); switch (syscall_number) { case PR_brk: translate_brk_exit(tracee); goto end; case PR_getcwd: { char path[PATH_MAX]; size_t new_size; size_t size; word_t output; size = (size_t) peek_reg(tracee, ORIGINAL, SYSARG_2); if (size == 0) { status = -EINVAL; break; } /* Ensure cwd still exists. */ status = translate_path(tracee, path, AT_FDCWD, ".", false); if (status < 0) break; new_size = strlen(tracee->fs->cwd) + 1; if (size < new_size) { status = -ERANGE; break; } /* Overwrite the path. */ output = peek_reg(tracee, ORIGINAL, SYSARG_1); status = write_data(tracee, output, tracee->fs->cwd, new_size); if (status < 0) break; /* The value of "status" is used to update the returned value * in translate_syscall_exit(). */ status = new_size; break; } case PR_accept: case PR_accept4: /* Nothing special to do if no sockaddr was specified. */ if (peek_reg(tracee, ORIGINAL, SYSARG_2) == 0) goto end; /* Fall through. */ case PR_getsockname: case PR_getpeername: { word_t sock_addr; word_t size_addr; word_t max_size; /* Error reported by the kernel. */ if ((int) syscall_result < 0) goto end; sock_addr = peek_reg(tracee, ORIGINAL, SYSARG_2); size_addr = peek_reg(tracee, MODIFIED, SYSARG_3); max_size = peek_reg(tracee, MODIFIED, SYSARG_6); status = translate_socketcall_exit(tracee, sock_addr, size_addr, max_size); if (status < 0) break; /* Don't overwrite the syscall result. */ goto end; } #define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) #define POKE_WORD(addr, value) \ poke_word(tracee, addr, value); \ if (errno != 0) { \ status = -errno; \ break; \ } #define PEEK_WORD(addr) \ peek_word(tracee, addr); \ if (errno != 0) { \ status = -errno; \ break; \ } case PR_socketcall: { word_t args_addr; word_t sock_addr; word_t size_addr; word_t max_size; args_addr = peek_reg(tracee, ORIGINAL, SYSARG_2); switch (peek_reg(tracee, ORIGINAL, SYSARG_1)) { case SYS_ACCEPT: case SYS_ACCEPT4: /* Nothing special to do if no sockaddr was specified. */ sock_addr = PEEK_WORD(SYSARG_ADDR(2)); if (sock_addr == 0) goto end; /* Fall through. */ case SYS_GETSOCKNAME: case SYS_GETPEERNAME: /* Handle these cases below. */ status = 1; break; case SYS_BIND: case SYS_CONNECT: /* Restore the initial parameters: this memory was * overwritten at the enter stage. Remember: POKE_WORD * puts -errno in status and breaks if an error * occured. */ POKE_WORD(SYSARG_ADDR(2), peek_reg(tracee, MODIFIED, SYSARG_5)); POKE_WORD(SYSARG_ADDR(3), peek_reg(tracee, MODIFIED, SYSARG_6)); status = 0; break; default: status = 0; break; } /* Error reported by the kernel or there's nothing else to do. */ if ((int) syscall_result < 0 || status == 0) goto end; /* An error occured in SYS_BIND or SYS_CONNECT. */ if (status < 0) break; /* Remember: PEEK_WORD puts -errno in status and breaks if an * error occured. */ sock_addr = PEEK_WORD(SYSARG_ADDR(2)); size_addr = PEEK_WORD(SYSARG_ADDR(3)); max_size = peek_reg(tracee, MODIFIED, SYSARG_6); status = translate_socketcall_exit(tracee, sock_addr, size_addr, max_size); if (status < 0) break; /* Don't overwrite the syscall result. */ goto end; } #undef SYSARG_ADDR #undef PEEK_WORD #undef POKE_WORD case PR_fchdir: case PR_chdir: /* These syscalls are fully emulated, see enter.c for details * (like errors). */ status = 0; break; case PR_rename: case PR_renameat: { char old_path[PATH_MAX]; char new_path[PATH_MAX]; ssize_t old_length; ssize_t new_length; Comparison comparison; Reg old_reg; Reg new_reg; char *tmp; /* Error reported by the kernel. */ if ((int) syscall_result < 0) goto end; if (syscall_number == PR_rename) { old_reg = SYSARG_1; new_reg = SYSARG_2; } else { old_reg = SYSARG_2; new_reg = SYSARG_4; } /* Get the old path, then convert it to the same * "point-of-view" as tracee->fs->cwd (guest). */ status = read_path(tracee, old_path, peek_reg(tracee, MODIFIED, old_reg)); if (status < 0) break; status = detranslate_path(tracee, old_path, NULL); if (status < 0) break; old_length = (status > 0 ? status - 1 : (ssize_t) strlen(old_path)); /* Nothing special to do if the moved path is not the * current working directory. */ comparison = compare_paths(old_path, tracee->fs->cwd); if (comparison != PATH1_IS_PREFIX && comparison != PATHS_ARE_EQUAL) { status = 0; break; } /* Get the new path, then convert it to the same * "point-of-view" as tracee->fs->cwd (guest). */ status = read_path(tracee, new_path, peek_reg(tracee, MODIFIED, new_reg)); if (status < 0) break; status = detranslate_path(tracee, new_path, NULL); if (status < 0) break; new_length = (status > 0 ? status - 1 : (ssize_t) strlen(new_path)); /* Sanity check. */ if (strlen(tracee->fs->cwd) >= PATH_MAX) { status = 0; break; } strcpy(old_path, tracee->fs->cwd); /* Update the virtual current working directory. */ substitute_path_prefix(old_path, old_length, new_path, new_length); tmp = talloc_strdup(tracee->fs, old_path); if (tmp == NULL) { status = -ENOMEM; break; } TALLOC_FREE(tracee->fs->cwd); tracee->fs->cwd = tmp; status = 0; break; } case PR_readlink: case PR_readlinkat: { char referee[PATH_MAX]; char referer[PATH_MAX]; size_t old_size; size_t new_size; size_t max_size; word_t input; word_t output; /* Error reported by the kernel. */ if ((int) syscall_result < 0) goto end; old_size = syscall_result; if (syscall_number == PR_readlink) { output = peek_reg(tracee, ORIGINAL, SYSARG_2); max_size = peek_reg(tracee, ORIGINAL, SYSARG_3); input = peek_reg(tracee, MODIFIED, SYSARG_1); } else { output = peek_reg(tracee, ORIGINAL, SYSARG_3); max_size = peek_reg(tracee, ORIGINAL, SYSARG_4); input = peek_reg(tracee, MODIFIED, SYSARG_2); } if (max_size > PATH_MAX) max_size = PATH_MAX; if (max_size == 0) { status = -EINVAL; break; } /* The kernel does NOT put the NULL terminating byte for * readlink(2). */ status = read_data(tracee, referee, output, old_size); if (status < 0) break; referee[old_size] = '\0'; /* Not optimal but safe (path is fully translated). */ status = read_path(tracee, referer, input); if (status < 0) break; if (status >= PATH_MAX) { status = -ENAMETOOLONG; break; } status = detranslate_path(tracee, referee, referer); if (status < 0) break; /* The original path doesn't require any transformation, i.e * it is a symetric binding. */ if (status == 0) goto end; /* Overwrite the path. Note: the output buffer might be * initialized with zeros but it was updated with the kernel * result, and then with the detranslated result. This later * might be shorter than the former, so it's safier to add a * NULL terminating byte when possible. This problem was * exposed by IDA Demo 6.3. */ if ((size_t) status < max_size) { new_size = status - 1; status = write_data(tracee, output, referee, status); } else { new_size = max_size; status = write_data(tracee, output, referee, max_size); } if (status < 0) break; /* The value of "status" is used to update the returned value * in translate_syscall_exit(). */ status = new_size; break; } #if defined(ARCH_X86_64) case PR_uname: { struct utsname utsname; word_t address; size_t size; if (get_abi(tracee) != ABI_2) goto end; /* Error reported by the kernel. */ if ((int) syscall_result < 0) goto end; address = peek_reg(tracee, ORIGINAL, SYSARG_1); status = read_data(tracee, &utsname, address, sizeof(utsname)); if (status < 0) break; /* Some 32-bit programs like package managers can be * confused when the kernel reports "x86_64". */ size = sizeof(utsname.machine); strncpy(utsname.machine, "i686", size); utsname.machine[size - 1] = '\0'; status = write_data(tracee, address, &utsname, sizeof(utsname)); if (status < 0) break; status = 0; break; } #endif case PR_execve: translate_execve_exit(tracee); goto end; case PR_ptrace: status = translate_ptrace_exit(tracee); break; case PR_wait4: case PR_waitpid: if (tracee->as_ptracer.waits_in != WAITS_IN_PROOT) goto end; status = translate_wait_exit(tracee); break; case PR_setrlimit: case PR_prlimit64: /* Error reported by the kernel. */ if ((int) syscall_result < 0) goto end; status = translate_setrlimit_exit(tracee, syscall_number == PR_prlimit64); if (status < 0) break; /* Don't overwrite the syscall result. */ goto end; default: goto end; } poke_reg(tracee, SYSARG_RESULT, (word_t) status); end: status = notify_extensions(tracee, SYSCALL_EXIT_END, status, 0); if (status < 0) poke_reg(tracee, SYSARG_RESULT, (word_t) status); }
/** * Start the loading of @tracee. This function returns no error since * it's either too late to do anything useful (the calling process is * already replaced) or the error reported by the kernel * (syscall_result < 0) will be propagated as-is. */ void translate_execve_exit(Tracee *tracee) { word_t syscall_result; int status; if (IS_NOTIFICATION_PTRACED_LOAD_DONE(tracee)) { /* Be sure not to confuse the ptracer with an * unexpected syscall/returned value. */ poke_reg(tracee, SYSARG_RESULT, 0); set_sysnum(tracee, PR_execve); /* According to most ABIs, all registers have * undefined values at program startup except: * * - the stack pointer * - the instruction pointer * - the rtld_fini pointer * - the state flags */ poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_2)); poke_reg(tracee, INSTR_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_3)); poke_reg(tracee, RTLD_FINI, 0); poke_reg(tracee, STATE_FLAGS, 0); /* Restore registers to their current values. */ save_current_regs(tracee, ORIGINAL); tracee->_regs_were_changed = true; /* This is is required to make GDB work correctly * under PRoot, however it deserves to be used * unconditionally. */ (void) bind_proc_pid_auxv(tracee); /* If the PTRACE_O_TRACEEXEC option is *not* in effect * for the execing tracee, the kernel delivers an * extra SIGTRAP to the tracee after execve(2) * *returns*. This is an ordinary signal (similar to * one which can be generated by "kill -TRAP"), not a * special kind of ptrace-stop. Employing * PTRACE_GETSIGINFO for this signal returns si_code * set to 0 (SI_USER). This signal may be blocked by * signal mask, and thus may be delivered (much) * later. -- man 2 ptrace * * This signal is delayed so far since the program was * not fully loaded yet; GDB would get "invalid * adress" errors otherwise. */ if ((tracee->as_ptracee.options & PTRACE_O_TRACEEXEC) == 0) kill(tracee->pid, SIGTRAP); return; } syscall_result = peek_reg(tracee, CURRENT, SYSARG_RESULT); if ((int) syscall_result < 0) return; /* Execve happened; commit the new "/proc/self/exe". */ if (tracee->new_exe != NULL) { (void) talloc_unlink(tracee, tracee->exe); tracee->exe = talloc_reference(tracee, tracee->new_exe); talloc_set_name_const(tracee->exe, "$exe"); } /* New processes have no heap. */ bzero(tracee->heap, sizeof(Heap)); /* Transfer the load script to the loader. */ status = transfer_load_script(tracee); if (status < 0) note(tracee, ERROR, INTERNAL, "can't transfer load script: %s", strerror(-status)); return; }
/** * Convert @tracee->load_info into a load script, then transfer this * latter into @tracee's memory. */ static int transfer_load_script(Tracee *tracee) { const word_t stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); static word_t page_size = 0; static word_t page_mask = 0; word_t entry_point; size_t script_size; size_t strings_size; size_t string1_size; size_t string2_size; size_t string3_size; size_t padding_size; word_t string1_address; word_t string2_address; word_t string3_address; void *buffer; size_t buffer_size; bool needs_executable_stack; LoadStatement *statement; void *cursor; int status; if (page_size == 0) { page_size = sysconf(_SC_PAGE_SIZE); if ((int) page_size <= 0) page_size = 0x1000; page_mask = ~(page_size - 1); } needs_executable_stack = (tracee->load_info->needs_executable_stack || ( tracee->load_info->interp != NULL && tracee->load_info->interp->needs_executable_stack)); /* Strings addresses are required to generate the load script, * for "open" actions. Since I want to generate it in one * pass, these strings will be put right below the current * stack pointer -- the only known adresses so far -- in the * "strings area". */ string1_size = strlen(tracee->load_info->user_path) + 1; string2_size = (tracee->load_info->interp == NULL ? 0 : strlen(tracee->load_info->interp->user_path) + 1); string3_size = (tracee->load_info->raw_path == tracee->load_info->user_path ? 0 : strlen(tracee->load_info->raw_path) + 1); /* A padding will be appended at the end of the load script * (a.k.a "strings area") to ensure this latter is aligned to * a word boundary, for sake of performance. */ padding_size = (stack_pointer - string1_size - string2_size - string3_size) % sizeof_word(tracee); strings_size = string1_size + string2_size + string3_size + padding_size; string1_address = stack_pointer - strings_size; string2_address = stack_pointer - strings_size + string1_size; string3_address = (string3_size == 0 ? string1_address : stack_pointer - strings_size + string1_size + string2_size); /* Compute the size of the load script. */ script_size = LOAD_STATEMENT_SIZE(*statement, open) + (LOAD_STATEMENT_SIZE(*statement, mmap) * talloc_array_length(tracee->load_info->mappings)) + (tracee->load_info->interp == NULL ? 0 : LOAD_STATEMENT_SIZE(*statement, open) + (LOAD_STATEMENT_SIZE(*statement, mmap) * talloc_array_length(tracee->load_info->interp->mappings))) + (needs_executable_stack ? LOAD_STATEMENT_SIZE(*statement, make_stack_exec) : 0) + LOAD_STATEMENT_SIZE(*statement, start); /* Allocate enough room for both the load script and the * strings area. */ buffer_size = script_size + strings_size; buffer = talloc_zero_size(tracee->ctx, buffer_size); if (buffer == NULL) return -ENOMEM; cursor = buffer; /* Load script statement: open. */ statement = cursor; statement->action = LOAD_ACTION_OPEN; statement->open.string_address = string1_address; cursor += LOAD_STATEMENT_SIZE(*statement, open); /* Load script statements: mmap. */ cursor = transcript_mappings(cursor, tracee->load_info->mappings); if (tracee->load_info->interp != NULL) { /* Load script statement: open. */ statement = cursor; statement->action = LOAD_ACTION_OPEN_NEXT; statement->open.string_address = string2_address; cursor += LOAD_STATEMENT_SIZE(*statement, open); /* Load script statements: mmap. */ cursor = transcript_mappings(cursor, tracee->load_info->interp->mappings); entry_point = ELF_FIELD(tracee->load_info->interp->elf_header, entry); } else entry_point = ELF_FIELD(tracee->load_info->elf_header, entry); if (needs_executable_stack) { /* Load script statement: stack_exec. */ statement = cursor; statement->action = LOAD_ACTION_MAKE_STACK_EXEC; statement->make_stack_exec.start = stack_pointer & page_mask; cursor += LOAD_STATEMENT_SIZE(*statement, make_stack_exec); } /* Load script statement: start. */ statement = cursor; /* Start of the program slightly differs when ptraced. */ if (tracee->as_ptracee.ptracer != NULL) statement->action = LOAD_ACTION_START_TRACED; else statement->action = LOAD_ACTION_START; statement->start.stack_pointer = stack_pointer; statement->start.entry_point = entry_point; statement->start.at_phent = ELF_FIELD(tracee->load_info->elf_header, phentsize); statement->start.at_phnum = ELF_FIELD(tracee->load_info->elf_header, phnum); statement->start.at_entry = ELF_FIELD(tracee->load_info->elf_header, entry); statement->start.at_phdr = ELF_FIELD(tracee->load_info->elf_header, phoff) + tracee->load_info->mappings[0].addr; statement->start.at_execfn = string3_address; cursor += LOAD_STATEMENT_SIZE(*statement, start); /* Sanity check. */ assert((uintptr_t) cursor - (uintptr_t) buffer == script_size); /* Convert the load script to the expected format. */ if (is_32on64_mode(tracee)) { int i; for (i = 0; buffer + i * sizeof(uint64_t) < cursor; i++) ((uint32_t *) buffer)[i] = ((uint64_t *) buffer)[i]; } /* Concatenate the load script and the strings. */ memcpy(cursor, tracee->load_info->user_path, string1_size); cursor += string1_size; if (string2_size != 0) { memcpy(cursor, tracee->load_info->interp->user_path, string2_size); cursor += string2_size; } if (string3_size != 0) { memcpy(cursor, tracee->load_info->raw_path, string3_size); cursor += string3_size; } /* Sanity check. */ cursor += padding_size; assert((uintptr_t) cursor - (uintptr_t) buffer == buffer_size); /* Allocate enough room in tracee's memory for the load * script, and make the first user argument points to this * location. Note that it is safe to update the stack pointer * manually since we are in execve sysexit. However it should * be done before transfering data since the kernel might not * allow page faults below the stack pointer. */ poke_reg(tracee, STACK_POINTER, stack_pointer - buffer_size); poke_reg(tracee, USERARG_1, stack_pointer - buffer_size); /* Copy everything in the tracee's memory at once. */ status = write_data(tracee, stack_pointer - buffer_size, buffer, buffer_size); if (status < 0) return status; /* Tracee's stack content is now as follow: * * +------------+ <- initial stack pointer (higher address) * | padding | * +------------+ * | string3 | * +------------+ * | string2 | * +------------+ * | string1 | * +------------+ * | start | * +------------+ * | mmap anon | * +------------+ * | mmap file | * +------------+ * | open next | * +------------+ * | mmap anon. | * +------------+ * | mmap file | * +------------+ * | open | * +------------+ <- stack pointer, userarg1 (word aligned) */ /* Remember we are in the sysexit stage, so be sure the * current register values will be used as-is at the end. */ save_current_regs(tracee, ORIGINAL); tracee->_regs_were_changed = true; return 0; }
void pt_debugger_x64::result(long value) { poke_reg(RAX, value); }
word_t translate_brk_enter(Tracee *tracee) { word_t new_brk_address; size_t old_heap_size; size_t new_heap_size; if (heap_offset == 0) { heap_offset = sysconf(_SC_PAGE_SIZE); if ((int) heap_offset <= 0) heap_offset = 0x1000; } /* Non-fixed mmap pages might be placed right after the * emulated heap on some architectures. A solution is to * preallocate some space to ensure a minimal heap size. */ if (tracee->heap->prealloc_size == 0) tracee->heap->prealloc_size = MAX(PREALLOCATED_HEAP_SIZE, heap_offset); new_brk_address = peek_reg(tracee, CURRENT, SYSARG_1); DEBUG_BRK("brk(0x%lx)\n", new_brk_address); /* Allocate a new mapping for the emulated heap. */ if (tracee->heap->base == 0) { Sysnum sysnum; if (new_brk_address != 0) notice(tracee, WARNING, INTERNAL, "process %d is doing suspicious brk()", tracee->pid); /* I don't understand yet why mmap(2) fails (EFAULT) * on architectures that also have mmap2(2). Maybe * this former implies MAP_FIXED in such cases. */ sysnum = detranslate_sysnum(get_abi(tracee), PR_mmap2) != SYSCALL_AVOIDER ? PR_mmap2 : PR_mmap; set_sysnum(tracee, sysnum); poke_reg(tracee, SYSARG_1 /* address */, 0); poke_reg(tracee, SYSARG_2 /* length */, heap_offset + tracee->heap->prealloc_size); poke_reg(tracee, SYSARG_3 /* prot */, PROT_READ | PROT_WRITE); poke_reg(tracee, SYSARG_4 /* flags */, MAP_PRIVATE | MAP_ANONYMOUS); poke_reg(tracee, SYSARG_5 /* fd */, -1); poke_reg(tracee, SYSARG_6 /* offset */, 0); return 0; } /* The size of the heap can't be negative. */ if (new_brk_address < tracee->heap->base) { set_sysnum(tracee, PR_void); return 0; } new_heap_size = new_brk_address - tracee->heap->base; old_heap_size = tracee->heap->size; /* Clear the released memory in preallocated space, so it will be * in the expected state next time it will be reallocated. */ if (new_heap_size < old_heap_size && new_heap_size < tracee->heap->prealloc_size) { (void) clear_mem(tracee, tracee->heap->base + new_heap_size, MIN(old_heap_size, tracee->heap->prealloc_size) - new_heap_size); } /* No need to use mremap when both old size and new size are * in the preallocated space. */ if ( new_heap_size <= tracee->heap->prealloc_size && old_heap_size <= tracee->heap->prealloc_size) { tracee->heap->size = new_heap_size; set_sysnum(tracee, PR_void); return 0; } /* Ensure the preallocated space will never be released. */ new_heap_size = MAX(new_heap_size, tracee->heap->prealloc_size); old_heap_size = MAX(old_heap_size, tracee->heap->prealloc_size); /* Actually resizing. */ set_sysnum(tracee, PR_mremap); poke_reg(tracee, SYSARG_1 /* old_address */, tracee->heap->base - heap_offset); poke_reg(tracee, SYSARG_2 /* old_size */, old_heap_size + heap_offset); poke_reg(tracee, SYSARG_3 /* new_size */, new_heap_size + heap_offset); poke_reg(tracee, SYSARG_4 /* flags */, 0); poke_reg(tracee, SYSARG_5 /* new_address */, 0); return 0; }
static int handle_sysenter_end(Tracee *tracee, Config *config) { int status; int sysnum; sysnum = get_sysnum(tracee, CURRENT); switch(sysnum) { #define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) #define PEEK_WORD(addr, forced_errno) \ peek_word(tracee, addr); \ if (errno != 0) { \ status = forced_errno ?: -errno; \ break; \ } #define POKE_WORD(addr, value) \ poke_word(tracee, addr, value); \ if (errno != 0) { \ status = -errno; \ break; \ } case PR_socketcall: { word_t sockfd; word_t args_addr; word_t sock_addr_saved; word_t sock_addr; word_t size; word_t call; int is_bind_syscall = sysnum == PR_bind; call = peek_reg(tracee, CURRENT, SYSARG_1); is_bind_syscall = call == SYS_BIND; args_addr = peek_reg(tracee, CURRENT, SYSARG_2); switch(call) { case SYS_BIND: case SYS_CONNECT: { /* Remember: PEEK_WORD puts -errno in status and breaks if an * error occured. */ sockfd = PEEK_WORD(SYSARG_ADDR(1), 0); sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); size = PEEK_WORD(SYSARG_ADDR(3), 0); sock_addr_saved = sock_addr; status = translate_port(tracee, config, sockfd, &sock_addr, size, is_bind_syscall); if (status < 0) break; /* These parameters are used/restored at the exit stage. */ poke_reg(tracee, SYSARG_5, sock_addr_saved); poke_reg(tracee, SYSARG_6, size); /* Remember: POKE_WORD puts -errno in status and breaks if an * error occured. */ POKE_WORD(SYSARG_ADDR(2), sock_addr); POKE_WORD(SYSARG_ADDR(3), sizeof(struct sockaddr_un)); return 0; } case SYS_LISTEN: { word_t sockfd; if(!config->netcoop_mode || !config->need_to_check_new_port) return 0; /* we retrieve this one from the listen() system call */ sockfd = PEEK_WORD(SYSARG_ADDR(1), 0); status = prepare_getsockname_chained_syscall(tracee, config, sockfd, true); return status; } default: return 0; } break; } #undef SYSARG_ADDR #undef PEEK_WORD #undef POKE_WORD case PR_connect: case PR_bind: { int size; int is_bind_syscall; word_t sockfd, sock_addr; /* * Get the reg address of the socket, and the size of the structure. * Note that the sockaddr and addrlen are at the same position for all 4 of these syscalls. */ sockfd = peek_reg(tracee, CURRENT, SYSARG_1); sock_addr = peek_reg(tracee, CURRENT, SYSARG_2); size = (int) peek_reg(tracee, CURRENT, SYSARG_3); is_bind_syscall = sysnum == PR_bind; status = translate_port(tracee, config, sockfd, &sock_addr, size, is_bind_syscall); if (status < 0) { return status; } /* then we modify the syscall argument so that it uses the modified socket address */ poke_reg(tracee, SYSARG_2, sock_addr); //poke_reg(tracee, SYSARG_3, size); poke_reg(tracee, SYSARG_3, sizeof(struct sockaddr_un)); return 0; } case PR_listen: { word_t sockfd; if(!config->netcoop_mode || !config->need_to_check_new_port) return 0; /* we retrieve this one from the listen() system call */ sockfd = peek_reg(tracee, CURRENT, SYSARG_1); status = prepare_getsockname_chained_syscall(tracee, config, sockfd, false); return status; } default: return 0; } return 0; }
/** * Replace current @tracee's syscall with an older and compatible one * whenever it's required, i.e. when the syscall is supported by the * kernel as specified by @config->virtual_release but it isn't * supported by the actual kernel. */ static int handle_sysenter_end(Tracee *tracee, Config *config) { /* Note: syscalls like "openat" can be replaced by "open" since PRoot * has canonicalized "fd + path" into "path". */ switch (get_sysnum(tracee, ORIGINAL)) { case PR_accept4: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,28), .new_sysarg_num = PR_accept, .shifts = NONE }; modify_syscall(tracee, config, &modif); return 0; } case PR_dup3: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_dup2, .shifts = NONE }; /* "If oldfd equals newfd, then dup3() fails with the * error EINVAL" -- man dup3 */ if (peek_reg(tracee, CURRENT, SYSARG_1) == peek_reg(tracee, CURRENT, SYSARG_2)) return -EINVAL; modify_syscall(tracee, config, &modif); return 0; } case PR_epoll_create1: { bool modified; Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_epoll_create, .shifts = NONE }; /* "the size argument is ignored, but must be greater * than zero" -- man epoll_create */ modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_1, 1); return 0; } case PR_epoll_pwait: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,19), .new_sysarg_num = PR_epoll_wait, .shifts = NONE }; modify_syscall(tracee, config, &modif); return 0; } case PR_eventfd2: { bool modified; word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_eventfd, .shifts = NONE }; modified = modify_syscall(tracee, config, &modif); if (modified) { /* EFD_SEMAPHORE can't be emulated with eventfd. */ flags = peek_reg(tracee, CURRENT, SYSARG_2); if ((flags & EFD_SEMAPHORE) != 0) return -EINVAL; } return 0; } case PR_faccessat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_access, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return 0; } case PR_fchmodat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_chmod, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return 0; } case PR_fchownat: { word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 3, .offset = -1 } } }; flags = peek_reg(tracee, CURRENT, SYSARG_5); modif.new_sysarg_num = ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? PR_lchown : PR_chown); modify_syscall(tracee, config, &modif); return 0; } case PR_fcntl: { word_t command; if (!needs_kompat(config, KERNEL_VERSION(2,6,24))) return 0; command = peek_reg(tracee, ORIGINAL, SYSARG_2); if (command == F_DUPFD_CLOEXEC) poke_reg(tracee, SYSARG_2, F_DUPFD); return 0; } case PR_newfstatat: case PR_fstatat64: { word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; flags = peek_reg(tracee, CURRENT, SYSARG_4); if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0) return -EINVAL; /* Exposed by LTP. */ #if defined(ARCH_X86_64) if ((flags & AT_SYMLINK_NOFOLLOW) != 0) modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_lstat : PR_lstat64); else modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_stat : PR_stat64); #else if ((flags & AT_SYMLINK_NOFOLLOW) != 0) modif.new_sysarg_num = PR_lstat64; else modif.new_sysarg_num = PR_stat64; #endif modify_syscall(tracee, config, &modif); return 0; } case PR_futex: { word_t operation; static bool warned = false; if (!needs_kompat(config, KERNEL_VERSION(2,6,22)) || config->actual_release == 0) return 0; operation = peek_reg(tracee, CURRENT, SYSARG_2); if ((operation & FUTEX_PRIVATE_FLAG) == 0) return 0; if (!warned) { warned = true; note(tracee, WARNING, USER, "kompat: this kernel doesn't support private futexes " "and PRoot can't emulate them. Expect some troubles..."); } poke_reg(tracee, SYSARG_2, operation & ~FUTEX_PRIVATE_FLAG); return 0; } case PR_futimesat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_utimes, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return 0; } case PR_inotify_init1: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_inotify_init, .shifts = NONE }; modify_syscall(tracee, config, &modif); return 0; } case PR_linkat: { word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_link, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 1, .offset = -1 }, [1] = { .sysarg = SYSARG_4, .nb_args = 1, .offset = -2 } } }; flags = peek_reg(tracee, CURRENT, SYSARG_5); if ((flags & ~AT_SYMLINK_FOLLOW) != 0) return -EINVAL; /* Exposed by LTP. */ modify_syscall(tracee, config, &modif); return 0; } case PR_mkdirat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_mkdir, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return 0; }
/** * Force current @tracee's syscall to behave as if executed by "root". * This function returns -errno if an error occured, otherwise 0. */ static int handle_sysexit_end(Tracee *tracee) { word_t sysnum; sysnum = get_sysnum(tracee, ORIGINAL); switch (sysnum) { case PR_chroot: { char path[PATH_MAX]; word_t result; word_t input; int status; /* Override only permission errors. */ result = peek_reg(tracee, CURRENT, SYSARG_RESULT); if ((int) result != -EPERM) return 0; input = peek_reg(tracee, MODIFIED, SYSARG_1); status = read_path(tracee, path, input); if (status < 0) return status; /* Only "new rootfs == current rootfs" is supported yet. */ status = compare_paths(get_root(tracee), path); if (status != PATHS_ARE_EQUAL) return 0; /* Force success. */ poke_reg(tracee, SYSARG_RESULT, 0); return 0; } case PR_setresuid: case PR_setresgid: case PR_setresuid32: case PR_setresgid32: case PR_mknod: case PR_capset: case PR_setxattr: case PR_chmod: case PR_chown: case PR_fchmod: case PR_fchown: case PR_lchown: case PR_chown32: case PR_fchown32: case PR_lchown32: case PR_fchmodat: case PR_fchownat: { word_t result; /* Override only permission errors. */ result = peek_reg(tracee, CURRENT, SYSARG_RESULT); if ((int) result != -EPERM) return 0; /* Force success. */ poke_reg(tracee, SYSARG_RESULT, 0); return 0; } case PR_getresuid: case PR_getresuid32: case PR_getresgid: case PR_getresgid32: poke_mem(tracee, peek_reg(tracee, ORIGINAL, SYSARG_1), 0); if (errno != 0) return -EFAULT; poke_mem(tracee, peek_reg(tracee, ORIGINAL, SYSARG_2), 0); if (errno != 0) return -EFAULT; poke_mem(tracee, peek_reg(tracee, ORIGINAL, SYSARG_3), 0); if (errno != 0) return -EFAULT; /* Force success. */ poke_reg(tracee, SYSARG_RESULT, 0); return 0; case PR_fstatat64: case PR_newfstatat: case PR_stat64: case PR_lstat64: case PR_fstat64: case PR_stat: case PR_lstat: case PR_fstat: { word_t result; word_t address; word_t uid, gid; Reg sysarg; /* Override only if it succeed. */ result = peek_reg(tracee, CURRENT, SYSARG_RESULT); if (result != 0) return 0; /* Get the address of the 'stat' structure. */ if (sysnum == PR_fstatat64 || sysnum == PR_newfstatat) sysarg = SYSARG_3; else sysarg = SYSARG_2; address = peek_reg(tracee, ORIGINAL, sysarg); /* Get the uid & gid values from the 'stat' structure. */ uid = peek_mem(tracee, address + offsetof_stat_uid(tracee)); if (errno != 0) uid = 0; /* Not fatal. */ gid = peek_mem(tracee, address + offsetof_stat_gid(tracee)); if (errno != 0) gid = 0; /* Not fatal. */ /* These values are 32-bit width, even on 64-bit architecture. */ uid &= 0xFFFFFFFF; gid &= 0xFFFFFFFF; /* Override only if the file is owned by the current user. * Errors are not fatal here. */ if (uid == getuid()) poke_mem(tracee, address + offsetof_stat_uid(tracee), 0); if (gid == getgid()) poke_mem(tracee, address + offsetof_stat_gid(tracee), 0); return 0; } case PR_getuid: case PR_getgid: case PR_getegid: case PR_geteuid: case PR_getuid32: case PR_getgid32: case PR_geteuid32: case PR_getegid32: case PR_setuid: case PR_setgid: case PR_setfsuid: case PR_setfsgid: case PR_setuid32: case PR_setgid32: case PR_setfsuid32: case PR_setfsgid32: /* Force success. */ poke_reg(tracee, SYSARG_RESULT, 0); return 0; default: return 0; } }
void pt_debugger_arm::syscall(int id) { poke_reg(ARM_r7, id); }
/** * Overwrite the @tracee's current syscall number with @sysnum. Note: * this neutral value is automatically converted into the architecture * value. */ void set_sysnum(Tracee *tracee, Sysnum sysnum) { poke_reg(tracee, SYSARG_NUM, detranslate_sysnum(get_abi(tracee), sysnum)); }
/** * Translate the input arguments of the current @tracee's syscall in the * @tracee->pid process area. This function sets @tracee->status to * -errno if an error occured from the tracee's point-of-view (EFAULT * for instance), otherwise 0. */ int translate_syscall_enter(Tracee *tracee) { int flags; int dirfd; int olddirfd; int newdirfd; int status; int status2; char path[PATH_MAX]; char oldpath[PATH_MAX]; char newpath[PATH_MAX]; word_t syscall_number; bool special = false; status = notify_extensions(tracee, SYSCALL_ENTER_START, 0, 0); if (status < 0) goto end; if (status > 0) return 0; /* Translate input arguments. */ syscall_number = get_sysnum(tracee, ORIGINAL); switch (syscall_number) { default: /* Nothing to do. */ status = 0; break; case PR_execve: status = translate_execve_enter(tracee); break; case PR_ptrace: status = translate_ptrace_enter(tracee); break; case PR_wait4: case PR_waitpid: status = translate_wait_enter(tracee); break; case PR_brk: translate_brk_enter(tracee); status = 0; break; case PR_getcwd: set_sysnum(tracee, PR_void); status = 0; break; case PR_fchdir: case PR_chdir: { struct stat statl; char *tmp; /* The ending "." ensures an error will be reported if * path does not exist or if it is not a directory. */ if (syscall_number == PR_chdir) { status = get_sysarg_path(tracee, path, SYSARG_1); if (status < 0) break; status = join_paths(2, oldpath, path, "."); if (status < 0) break; dirfd = AT_FDCWD; } else { strcpy(oldpath, "."); dirfd = peek_reg(tracee, CURRENT, SYSARG_1); } status = translate_path(tracee, path, dirfd, oldpath, true); if (status < 0) break; status = lstat(path, &statl); if (status < 0) break; /* Check this directory is accessible. */ if ((statl.st_mode & S_IXUSR) == 0) return -EACCES; /* Sadly this method doesn't detranslate statefully, * this means that there's an ambiguity when several * bindings are from the same host path: * * $ proot -m /tmp:/a -m /tmp:/b fchdir_getcwd /a * /b * * $ proot -m /tmp:/b -m /tmp:/a fchdir_getcwd /a * /a * * A solution would be to follow each file descriptor * just like it is done for cwd. */ status = detranslate_path(tracee, path, NULL); if (status < 0) break; /* Remove the trailing "/" or "/.". */ chop_finality(path); tmp = talloc_strdup(tracee->fs, path); if (tmp == NULL) { status = -ENOMEM; break; } TALLOC_FREE(tracee->fs->cwd); tracee->fs->cwd = tmp; talloc_set_name_const(tracee->fs->cwd, "$cwd"); set_sysnum(tracee, PR_void); status = 0; break; } case PR_bind: case PR_connect: { word_t address; word_t size; address = peek_reg(tracee, CURRENT, SYSARG_2); size = peek_reg(tracee, CURRENT, SYSARG_3); status = translate_socketcall_enter(tracee, &address, size); if (status <= 0) break; poke_reg(tracee, SYSARG_2, address); poke_reg(tracee, SYSARG_3, sizeof(struct sockaddr_un)); status = 0; break; } #define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) #define PEEK_WORD(addr, forced_errno) \ peek_word(tracee, addr); \ if (errno != 0) { \ status = forced_errno ?: -errno; \ break; \ } #define POKE_WORD(addr, value) \ poke_word(tracee, addr, value); \ if (errno != 0) { \ status = -errno; \ break; \ } case PR_accept: case PR_accept4: /* Nothing special to do if no sockaddr was specified. */ if (peek_reg(tracee, ORIGINAL, SYSARG_2) == 0) { status = 0; break; } special = true; /* Fall through. */ case PR_getsockname: case PR_getpeername:{ int size; /* Remember: PEEK_WORD puts -errno in status and breaks if an * error occured. */ size = (int) PEEK_WORD(peek_reg(tracee, ORIGINAL, SYSARG_3), special ? -EINVAL : 0); /* The "size" argument is both used as an input parameter * (max. size) and as an output parameter (actual size). The * exit stage needs to know the max. size to not overwrite * anything, that's why it is copied in the 6th argument * (unused) before the kernel updates it. */ poke_reg(tracee, SYSARG_6, size); status = 0; break; } case PR_socketcall: { word_t args_addr; word_t sock_addr_saved; word_t sock_addr; word_t size_addr; word_t size; args_addr = peek_reg(tracee, CURRENT, SYSARG_2); switch (peek_reg(tracee, CURRENT, SYSARG_1)) { case SYS_BIND: case SYS_CONNECT: /* Handle these cases below. */ status = 1; break; case SYS_ACCEPT: case SYS_ACCEPT4: /* Nothing special to do if no sockaddr was specified. */ sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); if (sock_addr == 0) { status = 0; break; } special = true; /* Fall through. */ case SYS_GETSOCKNAME: case SYS_GETPEERNAME: /* Remember: PEEK_WORD puts -errno in status and breaks * if an error occured. */ size_addr = PEEK_WORD(SYSARG_ADDR(3), 0); size = (int) PEEK_WORD(size_addr, special ? -EINVAL : 0); /* See case PR_accept for explanation. */ poke_reg(tracee, SYSARG_6, size); status = 0; break; default: status = 0; break; } /* An error occured or there's nothing else to do. */ if (status <= 0) break; /* Remember: PEEK_WORD puts -errno in status and breaks if an * error occured. */ sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); size = PEEK_WORD(SYSARG_ADDR(3), 0); sock_addr_saved = sock_addr; status = translate_socketcall_enter(tracee, &sock_addr, size); if (status <= 0) break; /* These parameters are used/restored at the exit stage. */ poke_reg(tracee, SYSARG_5, sock_addr_saved); poke_reg(tracee, SYSARG_6, size); /* Remember: POKE_WORD puts -errno in status and breaks if an * error occured. */ POKE_WORD(SYSARG_ADDR(2), sock_addr); POKE_WORD(SYSARG_ADDR(3), sizeof(struct sockaddr_un)); status = 0; break; } #undef SYSARG_ADDR #undef PEEK_WORD #undef POKE_WORD case PR_access: case PR_acct: case PR_chmod: case PR_chown: case PR_chown32: case PR_chroot: case PR_getxattr: case PR_listxattr: case PR_mknod: case PR_oldstat: case PR_creat: case PR_removexattr: case PR_setxattr: case PR_stat: case PR_stat64: case PR_statfs: case PR_statfs64: case PR_swapoff: case PR_swapon: case PR_truncate: case PR_truncate64: case PR_umount: case PR_umount2: case PR_uselib: case PR_utime: case PR_utimes: status = translate_sysarg(tracee, SYSARG_1, REGULAR); break; case PR_open: flags = peek_reg(tracee, CURRENT, SYSARG_2); if ( ((flags & O_NOFOLLOW) != 0) || ((flags & O_EXCL) != 0 && (flags & O_CREAT) != 0)) status = translate_sysarg(tracee, SYSARG_1, SYMLINK); else status = translate_sysarg(tracee, SYSARG_1, REGULAR); break; case PR_fchownat: case PR_fstatat64: case PR_newfstatat: case PR_utimensat: case PR_name_to_handle_at: dirfd = peek_reg(tracee, CURRENT, SYSARG_1); status = get_sysarg_path(tracee, path, SYSARG_2); if (status < 0) break; flags = ( syscall_number == PR_fchownat || syscall_number == PR_name_to_handle_at) ? peek_reg(tracee, CURRENT, SYSARG_5) : peek_reg(tracee, CURRENT, SYSARG_4); if ((flags & AT_SYMLINK_NOFOLLOW) != 0) status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); else status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); break; case PR_fchmodat: case PR_faccessat: case PR_futimesat: case PR_mknodat: dirfd = peek_reg(tracee, CURRENT, SYSARG_1); status = get_sysarg_path(tracee, path, SYSARG_2); if (status < 0) break; status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); break; case PR_inotify_add_watch: flags = peek_reg(tracee, CURRENT, SYSARG_3); if ((flags & IN_DONT_FOLLOW) != 0) status = translate_sysarg(tracee, SYSARG_2, SYMLINK); else status = translate_sysarg(tracee, SYSARG_2, REGULAR); break; case PR_readlink: case PR_lchown: case PR_lchown32: case PR_lgetxattr: case PR_llistxattr: case PR_lremovexattr: case PR_lsetxattr: case PR_lstat: case PR_lstat64: case PR_oldlstat: case PR_unlink: case PR_rmdir: case PR_mkdir: status = translate_sysarg(tracee, SYSARG_1, SYMLINK); break; case PR_pivot_root: status = translate_sysarg(tracee, SYSARG_1, REGULAR); if (status < 0) break; status = translate_sysarg(tracee, SYSARG_2, REGULAR); break; case PR_linkat: olddirfd = peek_reg(tracee, CURRENT, SYSARG_1); newdirfd = peek_reg(tracee, CURRENT, SYSARG_3); flags = peek_reg(tracee, CURRENT, SYSARG_5); status = get_sysarg_path(tracee, oldpath, SYSARG_2); if (status < 0) break; status = get_sysarg_path(tracee, newpath, SYSARG_4); if (status < 0) break; if ((flags & AT_SYMLINK_FOLLOW) != 0) status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, REGULAR); else status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, SYMLINK); if (status < 0) break; status = translate_path2(tracee, newdirfd, newpath, SYSARG_4, SYMLINK); break; case PR_mount: status = get_sysarg_path(tracee, path, SYSARG_1); if (status < 0) break; /* The following check covers only 90% of the cases. */ if (path[0] == '/' || path[0] == '.') { status = translate_path2(tracee, AT_FDCWD, path, SYSARG_1, REGULAR); if (status < 0) break; } status = translate_sysarg(tracee, SYSARG_2, REGULAR); break; case PR_openat: dirfd = peek_reg(tracee, CURRENT, SYSARG_1); flags = peek_reg(tracee, CURRENT, SYSARG_3); status = get_sysarg_path(tracee, path, SYSARG_2); if (status < 0) break; if ( ((flags & O_NOFOLLOW) != 0) || ((flags & O_EXCL) != 0 && (flags & O_CREAT) != 0)) status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); else status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); break; case PR_readlinkat: case PR_unlinkat: case PR_mkdirat: dirfd = peek_reg(tracee, CURRENT, SYSARG_1); status = get_sysarg_path(tracee, path, SYSARG_2); if (status < 0) break; status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); break; case PR_link: case PR_rename: status = translate_sysarg(tracee, SYSARG_1, SYMLINK); if (status < 0) break; status = translate_sysarg(tracee, SYSARG_2, SYMLINK); break; case PR_renameat: olddirfd = peek_reg(tracee, CURRENT, SYSARG_1); newdirfd = peek_reg(tracee, CURRENT, SYSARG_3); status = get_sysarg_path(tracee, oldpath, SYSARG_2); if (status < 0) break; status = get_sysarg_path(tracee, newpath, SYSARG_4); if (status < 0) break; status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, SYMLINK); if (status < 0) break; status = translate_path2(tracee, newdirfd, newpath, SYSARG_4, SYMLINK); break; case PR_symlink: status = translate_sysarg(tracee, SYSARG_2, SYMLINK); break; case PR_symlinkat: newdirfd = peek_reg(tracee, CURRENT, SYSARG_2); status = get_sysarg_path(tracee, newpath, SYSARG_3); if (status < 0) break; status = translate_path2(tracee, newdirfd, newpath, SYSARG_3, SYMLINK); break; } end: status2 = notify_extensions(tracee, SYSCALL_ENTER_END, status, 0); if (status2 < 0) status = status2; return status; }
void translate_syscall(Tracee *tracee) { const bool is_enter_stage = IS_IN_SYSENTER(tracee); int status; assert(tracee->exe != NULL); status = fetch_regs(tracee); if (status < 0) return; if (is_enter_stage) { /* Never restore original register values at the end * of this stage. */ tracee->restore_original_regs = false; print_current_regs(tracee, 3, "sysenter start"); /* Translate the syscall only if it was actually * requested by the tracee, it is not a syscall * chained by PRoot. */ if (tracee->chain.syscalls == NULL) { save_current_regs(tracee, ORIGINAL); status = translate_syscall_enter(tracee); save_current_regs(tracee, MODIFIED); } else { status = notify_extensions(tracee, SYSCALL_CHAINED_ENTER, 0, 0); tracee->restart_how = PTRACE_SYSCALL; } /* Remember the tracee status for the "exit" stage and * avoid the actual syscall if an error was reported * by the translation/extension. */ if (status < 0) { set_sysnum(tracee, PR_void); poke_reg(tracee, SYSARG_RESULT, (word_t) status); tracee->status = status; } else tracee->status = 1; /* Restore tracee's stack pointer now if it won't hit * the sysexit stage (i.e. when seccomp is enabled and * there's nothing else to do). */ if (tracee->restart_how == PTRACE_CONT) { tracee->status = 0; poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, STACK_POINTER)); } } else { /* By default, restore original register values at the * end of this stage. */ tracee->restore_original_regs = true; print_current_regs(tracee, 5, "sysexit start"); /* Translate the syscall only if it was actually * requested by the tracee, it is not a syscall * chained by PRoot. */ if (tracee->chain.syscalls == NULL) translate_syscall_exit(tracee); else (void) notify_extensions(tracee, SYSCALL_CHAINED_EXIT, 0, 0); /* Reset the tracee's status. */ tracee->status = 0; /* Insert the next chained syscall, if any. */ if (tracee->chain.syscalls != NULL) chain_next_syscall(tracee); } (void) push_regs(tracee); if (is_enter_stage) print_current_regs(tracee, 5, "sysenter end" ); else print_current_regs(tracee, 4, "sysexit end"); }
void pt_debugger_x64::syscall(int id) { poke_reg(ORIG_RAX, id); }
void pt_debugger_arm::result(long value) { poke_reg(ARM_r0, value); }
/** * Replace current @tracee's syscall with an older and compatible one * whenever it's required, i.e. when the syscall is supported by the * kernel as specified by @config->release but it isn't supported by * the actual kernel. */ static void handle_sysenter_end(Tracee *tracee, Config *config) { bool modified; /* Note: syscalls like "openat" can be replaced by "open" since PRoot * has canonicalized "fd + path" into "path". */ switch (get_sysnum(tracee, ORIGINAL)) { case PR_accept4: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,28), .new_sysarg_num = PR_accept, .shifts = {{0}} }; modify_syscall(tracee, config, &modif); return; } case PR_dup3: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_dup2, .shifts = {{0}} }; modify_syscall(tracee, config, &modif); return; } case PR_epoll_create1: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_epoll_create, .shifts = {{0}} }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_1, 0); /* Force "size" to 0. */ return; } case PR_epoll_pwait: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,19), .new_sysarg_num = PR_epoll_wait, .shifts = {{0}} }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_5, 0); /* Force "sigmask" to 0. */ return; } case PR_eventfd2: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_eventfd, .shifts = {{0}} }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_2, 0); /* Force "flags" to 0. */ return; } case PR_faccessat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_access, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_4, 0); /* Force "flags" to 0. */ return; } case PR_fchmodat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_chmod, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_4, 0); /* Force "flags" to 0. */ return; } case PR_fchownat: { word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 3, .offset = -1 } } }; flags = peek_reg(tracee, CURRENT, SYSARG_5); modif.new_sysarg_num = ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? PR_lchown : PR_chown); modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_5, 0); /* Force "flags" to 0. */ return; } case PR_newfstatat: case PR_fstatat64: { word_t flags; Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; flags = peek_reg(tracee, CURRENT, SYSARG_4); #if defined(ARCH_X86_64) if ((flags & AT_SYMLINK_NOFOLLOW) != 0) modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_lstat : PR_lstat64); else modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_stat : PR_stat64); #else if ((flags & AT_SYMLINK_NOFOLLOW) != 0) modif.new_sysarg_num = PR_lstat64; else modif.new_sysarg_num = PR_stat64; #endif modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_4, 0); /* Force "flags" to 0. */ return; } case PR_futimesat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_utimes, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return; } case PR_inotify_init1: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,27), .new_sysarg_num = PR_inotify_init, .shifts = {{0}} }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_1, 0); /* Force "flags" to 0. */ return; } case PR_linkat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_link, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 1, .offset = -1 }, [1] = { .sysarg = SYSARG_4, .nb_args = 1, .offset = -2 } } }; modified = modify_syscall(tracee, config, &modif); if (modified) poke_reg(tracee, SYSARG_5, 0); /* Force "flags" to 0. */ return; } case PR_mkdirat: { Modif modif = { .expected_release = KERNEL_VERSION(2,6,16), .new_sysarg_num = PR_mkdir, .shifts = { [0] = { .sysarg = SYSARG_2, .nb_args = 2, .offset = -1 } } }; modify_syscall(tracee, config, &modif); return; }