/** * Translate the wait syscall made by @ptracer into a "void" syscall * if the expected pid is one of its ptracees, in order to emulate the * ptrace mechanism within PRoot. This function returns -errno if an * error occured (unsupported request), otherwise 0. */ int translate_wait_enter(Tracee *ptracer) { Tracee *ptracee; pid_t pid; PTRACER.waits_in = WAITS_IN_KERNEL; /* Don't emulate the ptrace mechanism if it's not a ptracer. */ if (PTRACER.nb_ptracees == 0) return 0; /* Don't emulate the ptrace mechanism if the requested pid is * not a ptracee. */ pid = (pid_t) peek_reg(ptracer, ORIGINAL, SYSARG_1); if (pid != -1) { ptracee = get_tracee(ptracer, pid, false); if (ptracee == NULL || PTRACEE.ptracer != ptracer) return 0; } /* This syscall is canceled at the enter stage in order to be * handled at the exit stage. */ set_sysnum(ptracer, PR_void); PTRACER.waits_in = WAITS_IN_PROOT; return 0; }
/** * Make new @parent's child inherit from it. Depending on * @clone_flags, some information are copied or shared. This function * returns -errno if an error occured, otherwise 0. */ int new_child(Tracee *parent, word_t clone_flags) { unsigned long pid; Tracee *child; int status; /* If the tracee calls clone(2) with the CLONE_VFORK flag, * PTRACE_EVENT_VFORK will be delivered instead [...]; * otherwise if the tracee calls clone(2) with the exit signal * set to SIGCHLD, PTRACE_EVENT_FORK will be delivered [...] * * -- ptrace(2) man-page * * That means we have to check if it's actually a clone(2) in * order to get the right flags. */ status = fetch_regs(parent); if (status >= 0 && get_sysnum(parent, CURRENT) == PR_clone) clone_flags = peek_reg(parent, CURRENT, SYSARG_1); /* Get the pid of the parent's new child. */ status = ptrace(PTRACE_GETEVENTMSG, parent->pid, NULL, &pid); if (status < 0) { notice(parent, WARNING, SYSTEM, "ptrace(GETEVENTMSG)"); return status; } child = get_tracee(parent, (pid_t) pid, true); if (child == NULL) { notice(parent, WARNING, SYSTEM, "running out of memory"); return -ENOMEM; } /* Sanity checks. */ assert(child != NULL && child->exe == NULL && child->cmdline == NULL && child->fs->cwd == NULL && child->fs->bindings.pending == NULL && child->fs->bindings.guest == NULL && child->fs->bindings.host == NULL && child->qemu == NULL && child->glue == NULL); child->verbose = parent->verbose; child->seccomp = parent->seccomp; child->sysexit_pending = parent->sysexit_pending; /* If CLONE_VM is set, the calling process and the child * process run in the same memory space [...] any memory * mapping or unmapping performed with mmap(2) or munmap(2) by * the child or calling process also affects the other * process. * * If CLONE_VM is not set, the child process runs in a * separate copy of the memory space of the calling process at * the time of clone(). Memory writes or file * mappings/unmappings performed by one of the processes do * not affect the other, as with fork(2). * * -- clone(2) man-page */ TALLOC_FREE(child->heap); child->heap = ((clone_flags & CLONE_VM) != 0) ? talloc_reference(child, parent->heap) : talloc_memdup(child, parent->heap, sizeof(Heap)); if (child->heap == NULL) return -ENOMEM; /* If CLONE_FS is set, the parent and the child process share * the same file system information. This includes the root * of the file system, the current working directory, and the * umask. Any call to chroot(2), chdir(2), or umask(2) * performed by the parent process or the child process also * affects the other process. * * If CLONE_FS is not set, the child process works on a copy * of the file system information of the parent process at the * time of the clone() call. Calls to chroot(2), chdir(2), * umask(2) performed later by one of the processes do not * affect the other process. * * -- clone(2) man-page */ TALLOC_FREE(child->fs); if ((clone_flags & CLONE_FS) != 0) { /* File-system name-space is shared. */ child->fs = talloc_reference(child, parent->fs); } else { /* File-system name-space is copied. */ child->fs = talloc_zero(child, FileSystemNameSpace); if (child->fs == NULL) return -ENOMEM; child->fs->cwd = talloc_strdup(child->fs, parent->fs->cwd); if (child->fs->cwd == NULL) return -ENOMEM; talloc_set_name_const(child->fs->cwd, "$cwd"); /* Bindings are shared across file-system name-spaces since a * "mount --bind" made by a process affects all other processes * under Linux. Actually they are copied when a sub * reconfiguration occured (nested proot or chroot(2)). */ child->fs->bindings.guest = talloc_reference(child->fs, parent->fs->bindings.guest); child->fs->bindings.host = talloc_reference(child->fs, parent->fs->bindings.host); } /* The path to the executable and the command-line are unshared only * once the child process does a call to execve(2). */ child->exe = talloc_reference(child, parent->exe); child->cmdline = talloc_reference(child, parent->cmdline); child->qemu_pie_workaround = parent->qemu_pie_workaround; child->qemu = talloc_reference(child, parent->qemu); child->glue = talloc_reference(child, parent->glue); child->host_ldso_paths = talloc_reference(child, parent->host_ldso_paths); child->guest_ldso_paths = talloc_reference(child, parent->guest_ldso_paths); inherit_extensions(child, parent, false); /* Restart the child tracee if it was already alive but * stopped until that moment. */ if (child->sigstop == SIGSTOP_PENDING) { int status; child->sigstop = SIGSTOP_ALLOWED; status = ptrace(PTRACE_SYSCALL, child->pid, NULL, 0); if (status < 0) TALLOC_FREE(child); } return 0; }
/** * This function emulates the @result of readlink("@base/@component") * with respect to @tracee, where @base belongs to "/proc" (according * to @comparison). This function returns -errno on error, an enum * @action otherwise (c.f. above). * * Unlike readlink(), this function includes the nul terminating byte * to @result. */ Action readlink_proc(const Tracee *tracee, char result[PATH_MAX], const char base[PATH_MAX], const char component[NAME_MAX], Comparison comparison) { const Tracee *known_tracee; char proc_path[64]; /* 64 > sizeof("/proc//fd/") + 2 * sizeof(#ULONG_MAX) */ int status; pid_t pid; assert(comparison == compare_paths("/proc", base)); /* Remember: comparison = compare_paths("/proc", base) */ switch (comparison) { case PATHS_ARE_EQUAL: /* Substitute "/proc/self" with "/proc/<PID>". */ if (strcmp(component, "self") != 0) return DEFAULT; status = snprintf(result, PATH_MAX, "/proc/%d", tracee->pid); if (status < 0 || status >= PATH_MAX) return -EPERM; return CANONICALIZE; case PATH1_IS_PREFIX: /* Handle "/proc/<PID>" below, where <PID> is process * monitored by PRoot. */ break; default: return DEFAULT; } pid = atoi(base + strlen("/proc/")); if (pid == 0) return DEFAULT; known_tracee = get_tracee(pid, false); if (!known_tracee) return DEFAULT; /* Handle links in "/proc/<PID>/". */ status = snprintf(proc_path, sizeof(proc_path), "/proc/%d", pid); if (status < 0 || status >= sizeof(proc_path)) return -EPERM; comparison = compare_paths(proc_path, base); switch (comparison) { case PATHS_ARE_EQUAL: #define SUBSTITUTE(name, field) \ do { \ if (strcmp(component, #name) != 0) \ break; \ \ status = strlen(known_tracee->field); \ if (status >= PATH_MAX) \ return -EPERM; \ \ strncpy(result, known_tracee->field, status + 1); \ return CANONICALIZE; \ } while (0) /* Substitute link "/proc/<PID>/???" with the content * of tracee->???. */ SUBSTITUTE(exe, exe); SUBSTITUTE(cwd, fs->cwd); //SUBSTITUTE(root); #undef SUBSTITUTE return DEFAULT; case PATH1_IS_PREFIX: /* Handle "/proc/<PID>/???" below. */ break; default: return DEFAULT; } /* Handle links in "/proc/<PID>/fd/". */ status = snprintf(proc_path, sizeof(proc_path), "/proc/%d/fd", pid); if (status < 0 || status >= sizeof(proc_path)) return -EPERM; comparison = compare_paths(proc_path, base); switch (comparison) { char *end_ptr; case PATHS_ARE_EQUAL: /* Sanity check: a number is expected. */ errno = 0; (void) strtol(component, &end_ptr, 10); if (errno != 0 || end_ptr == component) return -EPERM; /* Don't dereference "/proc/<PID>/fd/???" now: they * can point to anonymous pipe, socket, ... otherwise * they point to a path already canonicalized by the * kernel. * * Note they are still correctly detranslated in * syscall/exit.c if a monitored process uses * readlink() against any of them. */ status = snprintf(result, PATH_MAX, "%s/%s", base, component); if (status < 0 || status >= PATH_MAX) return -EPERM; return DONT_CANONICALIZE; default: return DEFAULT; } return DEFAULT; }