static int init(struct parasite_init_args *args) { k_rtsigset_t to_block; int ret; ret = brk_init(); if (ret) return -ret; tsock = sys_socket(PF_UNIX, SOCK_DGRAM, 0); if (tsock < 0) return -tsock; ret = sys_bind(tsock, (struct sockaddr *) &args->p_addr, args->p_addr_len); if (ret < 0) return ret; ret = sys_connect(tsock, (struct sockaddr *)&args->h_addr, args->h_addr_len); if (ret < 0) return ret; ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, &old_blocked, sizeof(k_rtsigset_t)); if (ret < 0) reset_blocked = ret; else reset_blocked = 1; return ret; }
static int restore_signals(siginfo_t *ptr, int nr, bool group) { int ret, i; k_rtsigset_t to_block; ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t)); if (ret) { pr_err("Unable to block signals %d", ret); return -1; } for (i = 0; i < nr; i++) { siginfo_t *info = ptr + i; pr_info("Restore signal %d group %d\n", info->si_signo, group); if (group) ret = sys_rt_sigqueueinfo(sys_getpid(), info->si_signo, info); else ret = sys_rt_tgsigqueueinfo(sys_getpid(), sys_gettid(), info->si_signo, info); if (ret) { pr_err("Unable to send siginfo %d %x with code %d\n", info->si_signo, info->si_code, ret); return -1;; } } return 0; }
static int fini(void) { if (reset_blocked == 1) sys_sigprocmask(SIG_SETMASK, &old_blocked, NULL, sizeof(k_rtsigset_t)); sys_close(logfd); sys_close(tsock); brk_fini(); return 0; }
/* * Threads restoration via sigreturn. Note it's locked * routine and calls for unlock at the end. */ long __export_restore_thread(struct thread_restore_args *args) { struct rt_sigframe *rt_sigframe; k_rtsigset_t to_block; unsigned long new_sp; int my_pid = sys_gettid(); int ret; if (my_pid != args->pid) { pr_err("Thread pid mismatch %d/%d\n", my_pid, args->pid); goto core_restore_end; } /* All signals must be handled by thread leader */ ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t)); if (ret) { pr_err("Unable to block signals %d", ret); goto core_restore_end; } rt_sigframe = (void *)args->mem_zone.rt_sigframe; if (restore_thread_common(rt_sigframe, args)) goto core_restore_end; ret = restore_creds(&args->ta->creds); if (ret) goto core_restore_end; ret = restore_dumpable_flag(&args->ta->mm); if (ret) goto core_restore_end; pr_info("%ld: Restored\n", sys_gettid()); restore_finish_stage(CR_STATE_RESTORE); if (restore_signals(args->siginfo, args->siginfo_nr, false)) goto core_restore_end; restore_finish_stage(CR_STATE_RESTORE_SIGCHLD); restore_pdeath_sig(args); restore_finish_stage(CR_STATE_RESTORE_CREDS); futex_dec_and_wake(&thread_inprogress); new_sp = (long)rt_sigframe + SIGFRAME_OFFSET; rst_sigreturn(new_sp); core_restore_end: pr_err("Restorer abnormal termination for %ld\n", sys_getpid()); futex_abort_and_wake(&task_entries->nr_in_progress); sys_exit_group(1); return -1; }
static int fini_thread(void) { struct tid_state_s *s; s = find_thread_state(sys_gettid()); if (!s) return -ENOENT; if (s->use_sig_blocked) return sys_sigprocmask(SIG_SETMASK, &s->sig_blocked, NULL, sizeof(k_rtsigset_t)); return 0; }
asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, compat_old_sigset_t __user *oset) { old_sigset_t s; long ret; mm_segment_t old_fs; if (set && get_user(s, set)) return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); ret = sys_sigprocmask(how, set ? (old_sigset_t __user *) &s : NULL, oset ? (old_sigset_t __user *) &s : NULL); set_fs(old_fs); if (ret == 0) if (oset) ret = put_user(s, oset); return ret; }
static int init_thread(void) { k_rtsigset_t to_block; int ret; if (next_tid_state >= nr_tid_state) return -ENOMEM; ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, &tid_state[next_tid_state].sig_blocked, sizeof(k_rtsigset_t)); if (ret >= 0) tid_state[next_tid_state].use_sig_blocked = true; tid_state[next_tid_state].tid = sys_gettid(); next_tid_state++; return ret; }
/* This function gets the list of all linux threads of the current process * passes them to the 'callback' along with the 'parameter' pointer; at the * call back call time all the threads are paused via * PTRACE_ATTACH. * The callback is executed from a separate thread which shares only the * address space, the filesystem, and the filehandles with the caller. Most * notably, it does not share the same pid and ppid; and if it terminates, * the rest of the application is still there. 'callback' is supposed to do * or arrange for ResumeAllProcessThreads. This happens automatically, if * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous * signals are blocked. If the 'callback' decides to unblock them, it must * ensure that they cannot terminate the application, or that * ResumeAllProcessThreads will get called. * It is an error for the 'callback' to make any library calls that could * acquire locks. Most notably, this means that most system calls have to * avoid going through libc. Also, this means that it is not legal to call * exit() or abort(). * We return -1 on error and the return value of 'callback' on success. */ int ListAllProcessThreads(void *parameter, ListAllProcessThreadsCallBack callback, ...) { char altstack_mem[ALT_STACKSIZE]; struct ListerParams args; pid_t clone_pid; int dumpable = 1, sig; struct kernel_sigset_t sig_blocked, sig_old; va_start(args.ap, callback); /* If we are short on virtual memory, initializing the alternate stack * might trigger a SIGSEGV. Let's do this early, before it could get us * into more trouble (i.e. before signal handlers try to use the alternate * stack, and before we attach to other threads). */ memset(altstack_mem, 0, sizeof(altstack_mem)); /* Some of our cleanup functions could conceivable use more stack space. * Try to touch the stack right now. This could be defeated by the compiler * being too smart for it's own good, so try really hard. */ DirtyStack(32768); /* Make this process "dumpable". This is necessary in order to ptrace() * after having called setuid(). */ dumpable = sys_prctl(PR_GET_DUMPABLE, 0); if (!dumpable) sys_prctl(PR_SET_DUMPABLE, 1); /* Fill in argument block for dumper thread */ args.result = -1; args.err = 0; args.altstack_mem = altstack_mem; args.parameter = parameter; args.callback = callback; /* Before cloning the thread lister, block all asynchronous signals, as we */ /* are not prepared to handle them. */ sys_sigfillset(&sig_blocked); for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { sys_sigdelset(&sig_blocked, sync_signals[sig]); } if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { args.err = errno; args.result = -1; goto failed; } /* scope */ { /* After cloning, both the parent and the child share the same instance * of errno. We must make sure that at least one of these processes * (in our case, the parent) uses modified syscall macros that update * a local copy of errno, instead. */ #ifdef __cplusplus #define sys0_sigprocmask sys.sigprocmask #define sys0_waitpid sys.waitpid SysCalls sys; #else int my_errno; #define SYS_ERRNO my_errno #define SYS_INLINE inline #define SYS_PREFIX 0 #undef SYS_LINUX_SYSCALL_SUPPORT_H #include "linux_syscall_support.h" #endif int clone_errno; clone_pid = local_clone((int (*)(void *))ListerThread, &args); clone_errno = errno; sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); if (clone_pid >= 0) { int status, rc; while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 && ERRNO == EINTR) { /* Keep waiting */ } if (rc < 0) { args.err = ERRNO; args.result = -1; } else if (WIFEXITED(status)) { switch (WEXITSTATUS(status)) { case 0: break; /* Normal process termination */ case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ args.result = -1; break; case 3: args.err = EPERM; /* Process is already being traced */ args.result = -1; break; default:args.err = ECHILD; /* Child died unexpectedly */ args.result = -1; break; } } else if (!WIFEXITED(status)) { args.err = EFAULT; /* Terminated due to an unhandled signal*/ args.result = -1; } } else { args.result = -1; args.err = clone_errno; } } /* Restore the "dumpable" state of the process */ failed: if (!dumpable) sys_prctl(PR_SET_DUMPABLE, dumpable); va_end(args.ap); errno = args.err; return args.result; }
/* * The main routine to restore task via sigreturn. * This one is very special, we never return there * but use sigreturn facility to restore core registers * and jump execution to some predefined ip read from * core file. */ long __export_restore_task(struct task_restore_args *args) { long ret = -1; int i; VmaEntry *vma_entry; unsigned long va; struct rt_sigframe *rt_sigframe; unsigned long new_sp; k_rtsigset_t to_block; pid_t my_pid = sys_getpid(); rt_sigaction_t act; bootstrap_start = args->bootstrap_start; bootstrap_len = args->bootstrap_len; #ifdef CONFIG_VDSO vdso_rt_size = args->vdso_rt_size; #endif task_entries = args->task_entries; helpers = args->helpers; n_helpers = args->n_helpers; *args->breakpoint = rst_sigreturn; ksigfillset(&act.rt_sa_mask); act.rt_sa_handler = sigchld_handler; act.rt_sa_flags = SA_SIGINFO | SA_RESTORER | SA_RESTART; act.rt_sa_restorer = cr_restore_rt; sys_sigaction(SIGCHLD, &act, NULL, sizeof(k_rtsigset_t)); log_set_fd(args->logfd); log_set_loglevel(args->loglevel); cap_last_cap = args->cap_last_cap; pr_info("Switched to the restorer %d\n", my_pid); #ifdef CONFIG_VDSO if (vdso_do_park(&args->vdso_sym_rt, args->vdso_rt_parked_at, vdso_rt_size)) goto core_restore_end; #endif if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len)) goto core_restore_end; /* Shift private vma-s to the left */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->end >= TASK_SIZE) continue; if (vma_entry->start > vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* Shift private vma-s to the right */ for (i = args->nr_vmas - 1; i >= 0; i--) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->start > TASK_SIZE) continue; if (vma_entry->start < vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* * OK, lets try to map new one. */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (vma_priv(vma_entry)) continue; va = restore_mapping(vma_entry); if (va != vma_entry->start) { pr_err("Can't restore %"PRIx64" mapping with %lx\n", vma_entry->start, va); goto core_restore_end; } } #ifdef CONFIG_VDSO /* * Proxify vDSO. */ for (i = 0; i < args->nr_vmas; i++) { if (vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VDSO) || vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VVAR)) { if (vdso_proxify("dumpee", &args->vdso_sym_rt, args->vdso_rt_parked_at, i, args->tgt_vmas, args->nr_vmas)) goto core_restore_end; break; } } #endif /* * Walk though all VMAs again to drop PROT_WRITE * if it was not there. */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) continue; if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) { struct shmem_info *entry; entry = find_shmem(args->shmems, args->nr_shmems, vma_entry->shmid); if (entry && entry->pid == my_pid && entry->start == vma_entry->start) futex_set_and_wake(&entry->lock, 1); } if (vma_entry->prot & PROT_WRITE) continue; sys_mprotect(decode_pointer(vma_entry->start), vma_entry_len(vma_entry), vma_entry->prot); } /* * Finally restore madivse() bits */ for (i = 0; i < args->nr_vmas; i++) { unsigned long m; vma_entry = args->tgt_vmas + i; if (!vma_entry->has_madv || !vma_entry->madv) continue; for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { if (vma_entry->madv & (1ul << m)) { ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); if (ret) { pr_err("madvise(%"PRIx64", %"PRIu64", %ld) " "failed with %ld\n", vma_entry->start, vma_entry_len(vma_entry), m, ret); goto core_restore_end; } } } } ret = 0; /* * Tune up the task fields. */ ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA, (long)args->mm.mm_start_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA, (long)args->mm.mm_end_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK, (long)args->mm.mm_start_stack, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK, (long)args->mm.mm_start_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK, (long)args->mm.mm_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START, (long)args->mm.mm_arg_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END, (long)args->mm.mm_arg_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START, (long)args->mm.mm_env_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END, (long)args->mm.mm_env_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV, (long)args->mm_saved_auxv, args->mm_saved_auxv_size); if (ret) goto core_restore_end; /* * Because of requirements applied from kernel side * we need to restore /proc/pid/exe symlink late, * after old existing VMAs are superseded with * new ones from image file. */ ret = restore_self_exe_late(args); if (ret) goto core_restore_end; /* * We need to prepare a valid sigframe here, so * after sigreturn the kernel will pick up the * registers from the frame, set them up and * finally pass execution to the new IP. */ rt_sigframe = (void *)args->t->mem_zone.rt_sigframe; if (restore_thread_common(rt_sigframe, args->t)) goto core_restore_end; /* * Threads restoration. This requires some more comments. This * restorer routine and thread restorer routine has the following * memory map, prepared by a caller code. * * | <-- low addresses high addresses --> | * +-------------------------------------------------------+-----------------------+ * | this proc body | own stack | rt_sigframe space | thread restore zone | * +-------------------------------------------------------+-----------------------+ * * where each thread restore zone is the following * * | <-- low addresses high addresses --> | * +--------------------------------------------------------------------------+ * | thread restore proc | thread1 stack | thread1 rt_sigframe | * +--------------------------------------------------------------------------+ */ if (args->nr_threads > 1) { struct thread_restore_args *thread_args = args->thread_args; long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; long last_pid_len; long parent_tid; int i, fd; fd = args->fd_last_pid; ret = sys_flock(fd, LOCK_EX); if (ret) { pr_err("Can't lock last_pid %d\n", fd); goto core_restore_end; } for (i = 0; i < args->nr_threads; i++) { char last_pid_buf[16], *s; /* skip self */ if (thread_args[i].pid == args->t->pid) continue; new_sp = restorer_stack(thread_args + i); last_pid_len = vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); sys_lseek(fd, 0, SEEK_SET); ret = sys_write(fd, s, last_pid_len); if (ret < 0) { pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); goto core_restore_end; } /* * To achieve functionality like libc's clone() * we need a pure assembly here, because clone()'ed * thread will run with own stack and we must not * have any additional instructions... oh, dear... */ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); } ret = sys_flock(fd, LOCK_UN); if (ret) { pr_err("Can't unlock last_pid %ld\n", ret); goto core_restore_end; } } sys_close(args->fd_last_pid); restore_rlims(args); ret = create_posix_timers(args); if (ret < 0) { pr_err("Can't restore posix timers %ld\n", ret); goto core_restore_end; } ret = timerfd_arm(args); if (ret < 0) { pr_err("Can't restore timerfd %ld\n", ret); goto core_restore_end; } pr_info("%ld: Restored\n", sys_getpid()); futex_set(&zombies_inprogress, args->nr_zombies); restore_finish_stage(CR_STATE_RESTORE); futex_wait_while_gt(&zombies_inprogress, 0); if (wait_helpers(args) < 0) goto core_restore_end; ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t)); if (ret) { pr_err("Unable to block signals %ld", ret); goto core_restore_end; } sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); ret = restore_signals(args->siginfo, args->siginfo_nr, true); if (ret) goto core_restore_end; ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, false); if (ret) goto core_restore_end; restore_finish_stage(CR_STATE_RESTORE_SIGCHLD); rst_tcp_socks_all(args); /* * Writing to last-pid is CAP_SYS_ADMIN protected, * turning off TCP repair is CAP_SYS_NED_ADMIN protected, * thus restore* creds _after_ all of the above. */ ret = restore_creds(&args->creds); ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_pdeath_sig(args->t); futex_set_and_wake(&thread_inprogress, args->nr_threads); restore_finish_stage(CR_STATE_RESTORE_CREDS); if (ret) BUG(); /* Wait until children stop to use args->task_entries */ futex_wait_while_gt(&thread_inprogress, 1); log_set_fd(-1); /* * The code that prepared the itimers makes shure the * code below doesn't fail due to bad timing values. */ #define itimer_armed(args, i) \ (args->itimers[i].it_interval.tv_sec || \ args->itimers[i].it_interval.tv_usec) if (itimer_armed(args, 0)) sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL); if (itimer_armed(args, 1)) sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL); if (itimer_armed(args, 2)) sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL); restore_posix_timers(args); sys_munmap(args->rst_mem, args->rst_mem_size); /* * Sigframe stack. */ new_sp = (long)rt_sigframe + SIGFRAME_OFFSET; /* * Prepare the stack and call for sigreturn, * pure assembly since we don't need any additional * code insns from gcc. */ rst_sigreturn(new_sp); core_restore_end: futex_abort_and_wake(&task_entries->nr_in_progress); pr_err("Restorer fail %ld\n", sys_getpid()); sys_exit_group(1); return -1; }
int sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { return sys_sigprocmask(how, set, oldset); }