/* we run at @regs->ip */ int __parasite_execute(struct parasite_ctl *ctl, pid_t pid, user_regs_struct_t *regs) { siginfo_t siginfo; int status; int ret = -1; again: if (ptrace(PTRACE_SETREGS, pid, NULL, regs)) { pr_perror("Can't set registers (pid: %d)", pid); goto err; } /* * Most ideas are taken from Tejun Heo's parasite thread * https://code.google.com/p/ptrace-parasite/ */ if (ptrace(PTRACE_CONT, pid, NULL, NULL)) { pr_perror("Can't continue (pid: %d)", pid); goto err; } if (wait4(pid, &status, __WALL, NULL) != pid) { pr_perror("Waited pid mismatch (pid: %d)", pid); goto err; } if (!WIFSTOPPED(status)) { pr_err("Task is still running (pid: %d)\n", pid); goto err; } if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) { pr_perror("Can't get siginfo (pid: %d)", pid); goto err; } if (ptrace(PTRACE_GETREGS, pid, NULL, regs)) { pr_perror("Can't obtain registers (pid: %d)", pid); goto err; } if (WSTOPSIG(status) != SIGTRAP || siginfo.si_code != ARCH_SI_TRAP) { retry_signal: pr_debug("** delivering signal %d si_code=%d\n", siginfo.si_signo, siginfo.si_code); if (ctl->signals_blocked) { pr_err("Unexpected %d task interruption, aborting\n", pid); goto err; } /* FIXME: jerr(siginfo.si_code > 0, err_restore); */ /* * This requires some explanation. If a signal from original * program delivered while we're trying to execute our * injected blob -- we need to setup original registers back * so the kernel would make sigframe for us and update the * former registers. * * Then we should swap registers back to our modified copy * and retry. */ if (ptrace(PTRACE_SETREGS, pid, NULL, &ctl->regs_orig)) { pr_perror("Can't set registers (pid: %d)", pid); goto err; } if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) { pr_perror("Can't interrupt (pid: %d)", pid); goto err; } if (ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)siginfo.si_signo)) { pr_perror("Can't continue (pid: %d)", pid); goto err; } if (wait4(pid, &status, __WALL, NULL) != pid) { pr_perror("Waited pid mismatch (pid: %d)", pid); goto err; } if (!WIFSTOPPED(status)) { pr_err("Task is still running (pid: %d)\n", pid); goto err; } if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) { pr_perror("Can't get siginfo (pid: %d)", pid); goto err; } if (SI_EVENT(siginfo.si_code) != PTRACE_EVENT_STOP) goto retry_signal; /* * Signal is delivered, so we should update * original registers. */ { user_regs_struct_t r; if (ptrace(PTRACE_GETREGS, pid, NULL, &r)) { pr_perror("Can't obtain registers (pid: %d)", pid); goto err; } ctl->regs_orig = r; } goto again; } /* * We've reached this point if int3 is triggered inside our * parasite code. So we're done. */ ret = 0; err: return ret; }
/* * This routine seizes task putting it into a special * state where we can manipulate the task via ptrace * interface, and finally we can detach ptrace out of * of it so the task would not know if it was saddled * up with someone else. */ int seize_wait_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds) { siginfo_t si; int status, nr_sigstop; int ret = 0, ret2, wait_errno = 0; struct proc_status_creds cr; /* * For the comparison below, let's zero out any padding. */ memzero(&cr, sizeof(struct proc_status_creds)); /* * It's ugly, but the ptrace API doesn't allow to distinguish * attaching to zombie from other errors. Thus we have to parse * the target's /proc/pid/stat. Sad, but parse whatever else * we might need at that early point. */ processes_to_wait--; try_again: ret = wait4(pid, &status, __WALL, NULL); if (ret < 0) { /* * wait4() can expectedly fail only in a first time * if a task is zombie. If we are here from try_again, * this means that we are tracing this task. * * processes_to_wait should be descrimented only once in this * function if a first wait was success. */ processes_to_wait++; wait_errno = errno; } ret2 = parse_pid_status(pid, &cr); if (ret2) goto err; if (ret < 0 || WIFEXITED(status) || WIFSIGNALED(status)) { if (cr.state != 'Z') { if (pid == getpid()) pr_err("The criu itself is within dumped tree.\n"); else pr_err("Unseizable non-zombie %d found, state %c, err %d/%d\n", pid, cr.state, ret, wait_errno); return -1; } return TASK_DEAD; } if ((ppid != -1) && (cr.ppid != ppid)) { pr_err("Task pid reused while suspending (%d: %d -> %d)\n", pid, ppid, cr.ppid); goto err; } if (!WIFSTOPPED(status)) { pr_err("SEIZE %d: task not stopped after seize\n", pid); goto err; } ret = ptrace(PTRACE_GETSIGINFO, pid, NULL, &si); if (ret < 0) { pr_perror("SEIZE %d: can't read signfo", pid); goto err; } if (SI_EVENT(si.si_code) != PTRACE_EVENT_STOP) { /* * Kernel notifies us about the task being seized received some * event other than the STOP, i.e. -- a signal. Let the task * handle one and repeat. */ if (ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)si.si_signo)) { pr_perror("Can't continue signal handling, aborting"); goto err; } ret = 0; goto try_again; } if (*creds == NULL) { *creds = xzalloc(sizeof(struct proc_status_creds)); if (!*creds) goto err; **creds = cr; } else if (!proc_status_creds_dumpable(*creds, &cr)) { pr_err("creds don't match %d %d\n", pid, ppid); goto err; } if (cr.seccomp_mode != SECCOMP_MODE_DISABLED && suspend_seccomp(pid) < 0) goto err; nr_sigstop = 0; if (cr.sigpnd & (1 << (SIGSTOP - 1))) nr_sigstop++; if (cr.shdpnd & (1 << (SIGSTOP - 1))) nr_sigstop++; if (si.si_signo == SIGSTOP) nr_sigstop++; if (nr_sigstop) { if (skip_sigstop(pid, nr_sigstop)) goto err_stop; return TASK_STOPPED; } if (si.si_signo == SIGTRAP) return TASK_ALIVE; else { pr_err("SEIZE %d: unsupported stop signal %d\n", pid, si.si_signo); goto err; } err_stop: kill(pid, SIGSTOP); err: if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) pr_perror("Unable to detach from %d", pid); return -1; }
int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds) { siginfo_t si; int status; int ret, ret2, ptrace_errno, wait_errno = 0; struct proc_status_creds cr; /* * For the comparison below, let's zero out any padding. */ memzero(&cr, sizeof(struct proc_status_creds)); ret = ptrace(PTRACE_SEIZE, pid, NULL, 0); ptrace_errno = errno; if (ret == 0) { /* * If we SEIZE-d the task stop it before going * and reading its stat from proc. Otherwise task * may die _while_ we're doing it and we'll have * inconsistent seize/state pair. * * If task dies after we seize it but before we * do this interrupt, we'll notice it via proc. */ ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL); if (ret < 0) { pr_perror("SEIZE %d: can't interrupt task", pid); ptrace(PTRACE_DETACH, pid, NULL, NULL); goto err; } } /* * It's ugly, but the ptrace API doesn't allow to distinguish * attaching to zombie from other errors. Thus we have to parse * the target's /proc/pid/stat. Sad, but parse whatever else * we might need at that early point. */ try_again: if (!ret) { ret = wait4(pid, &status, __WALL, NULL); wait_errno = errno; } ret2 = parse_pid_status(pid, &cr); if (ret2) goto err; if (!may_dump(&cr)) { pr_err("Check uid (pid: %d) failed\n", pid); goto err; } if (ret < 0) { if (cr.state != 'Z') { if (pid == getpid()) pr_err("The criu itself is within dumped tree.\n"); else pr_err("Unseizable non-zombie %d found, state %c, err %d/%d/%d\n", pid, cr.state, ret, ptrace_errno, wait_errno); return -1; } return TASK_DEAD; } if ((ppid != -1) && (cr.ppid != ppid)) { pr_err("Task pid reused while suspending (%d: %d -> %d)\n", pid, ppid, cr.ppid); goto err; } if (!WIFSTOPPED(status)) { pr_err("SEIZE %d: task not stopped after seize\n", pid); goto err; } ret = ptrace(PTRACE_GETSIGINFO, pid, NULL, &si); if (ret < 0) { pr_perror("SEIZE %d: can't read signfo", pid); goto err; } if (SI_EVENT(si.si_code) != PTRACE_EVENT_STOP) { /* * Kernel notifies us about the task being seized received some * event other than the STOP, i.e. -- a signal. Let the task * handle one and repeat. */ if (ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)si.si_signo)) { pr_perror("Can't continue signal handling, aborting"); goto err; } ret = 0; goto try_again; } if (*creds == NULL) { *creds = xzalloc(sizeof(struct proc_status_creds)); if (!*creds) goto err_stop; **creds = cr; } else if (!proc_status_creds_eq(*creds, &cr)) { pr_err("creds don't match %d %d\n", pid, ppid); goto err_stop; } if (cr.seccomp_mode != SECCOMP_MODE_DISABLED && suspend_seccomp(pid) < 0) goto err_stop; if (si.si_signo == SIGTRAP) return TASK_ALIVE; else if (si.si_signo == SIGSTOP) { /* * PTRACE_SEIZE doesn't affect signal or group stop state. * Currently ptrace reported that task is in stopped state. * We need to start task again, and it will be trapped * immediately, because we sent PTRACE_INTERRUPT to it. */ ret = ptrace(PTRACE_CONT, pid, 0, 0); if (ret) { pr_perror("Unable to start process"); goto err_stop; } ret = wait4(pid, &status, __WALL, NULL); if (ret < 0) { pr_perror("SEIZE %d: can't wait task", pid); goto err_stop; } if (ret != pid) { pr_err("SEIZE %d: wrong task attached (%d)\n", pid, ret); goto err_stop; } if (!WIFSTOPPED(status)) { pr_err("SEIZE %d: task not stopped after seize\n", pid); goto err_stop; } return TASK_STOPPED; } else { pr_err("SEIZE %d: unsupported stop signal %d\n", pid, si.si_signo); goto err; } err_stop: kill(pid, SIGSTOP); err: ptrace(PTRACE_DETACH, pid, NULL, NULL); return -1; }