int ptrace_attach(struct task_struct *task) { int retval; audit_ptrace(task); retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) goto out; if (same_thread_group(task, current)) goto out; /* * Protect exec's credential calculations against our interference; * interference; SUID, SGID and LSM creds get determined differently * under ptrace. */ retval = -ERESTARTNOINTR; if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); task_unlock(task); if (retval) goto unlock_creds; write_lock_irq(&tasklist_lock); retval = -EPERM; if (unlikely(task->exit_state)) goto unlock_tasklist; if (task->ptrace) goto unlock_tasklist; task->ptrace = PT_PTRACED; if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: return retval; }
static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || !same_thread_group(rtn, current) || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) return NULL; return task_pid(rtn); }
/* * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ int proc_fd_permission(struct inode *inode, int mask) { struct task_struct *p; int rv; rv = generic_permission(inode, mask); if (rv == 0) return rv; rcu_read_lock(); p = pid_task(proc_pid(inode), PIDTYPE_PID); if (p && same_thread_group(p, current)) rv = 0; rcu_read_unlock(); return rv; }
/* * Checks whether the given task is dying or exiting and likely to * release its address space. This means that all threads and processes * sharing the same mm have to be killed or exiting. * Caller has to make sure that task->mm is stable (hold task_lock or * it operates on the current). */ static bool task_will_free_mem(struct task_struct *task) { struct mm_struct *mm = task->mm; struct task_struct *p; bool ret = true; /* * Skip tasks without mm because it might have passed its exit_mm and * exit_oom_victim. oom_reaper could have rescued that but do not rely * on that for now. We can consider find_lock_task_mm in future. */ if (!mm) return false; if (!__task_will_free_mem(task)) return false; /* * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ if (test_bit(MMF_OOM_SKIP, &mm->flags)) return false; if (atomic_read(&mm->mm_users) <= 1) return true; /* * Make sure that all tasks which share the mm with the given tasks * are dying as well to make sure that a) nobody pins its mm and * b) the task is also reapable by the oom reaper. */ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(task, p)) continue; ret = __task_will_free_mem(p); if (!ret) break; } rcu_read_unlock(); return ret; }
static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; int sig = event->sigev_signo; if ((event->sigev_notify & SIGEV_THREAD_ID ) && (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || !same_thread_group(rtn, current) || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || sig_kernel_coredump(sig))) return NULL; return task_pid(rtn); }
static int posix_cpu_clock_get_task(struct task_struct *tsk, const clockid_t which_clock, struct timespec *tp) { int err = -EINVAL; unsigned long long rtn; if (CPUCLOCK_PERTHREAD(which_clock)) { if (same_thread_group(tsk, current)) err = cpu_clock_sample(which_clock, tsk, &rtn); } else { if (tsk == current || thread_group_leader(tsk)) err = cpu_clock_sample_group(which_clock, tsk, &rtn); } if (!err) sample_to_timespec(which_clock, rtn, tp); return err; }
int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. * * ptrace_attach denies several cases that /proc allows * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ int dumpable = 0; /* Don't let security modules deny introspection */ if (same_thread_group(task, current)) return 0; rcu_read_lock(); tcred = __task_cred(task); if (cred->user->user_ns == tcred->user->user_ns && (cred->uid == tcred->euid && cred->uid == tcred->suid && cred->uid == tcred->uid && cred->gid == tcred->egid && cred->gid == tcred->sgid && cred->gid == tcred->gid)) goto ok; if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) goto ok; rcu_read_unlock(); return -EPERM; ok: rcu_read_unlock(); smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE)) return -EPERM; return security_ptrace_access_check(task, mode); }
static int check_clock(const clockid_t which_clock) { int error = 0; struct task_struct *p; const pid_t pid = CPUCLOCK_PID(which_clock); if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) return -EINVAL; if (pid == 0) return 0; read_lock(&tasklist_lock); p = find_task_by_vpid(pid); if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? same_thread_group(p, current) : thread_group_leader(p))) { error = -EINVAL; } read_unlock(&tasklist_lock); return error; }
/* * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. * This is called from sys_timer_create() and do_cpu_nanosleep() with the * new timer already all-zeros initialized. */ static int posix_cpu_timer_create(struct k_itimer *new_timer) { int ret = 0; const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); struct task_struct *p; if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) return -EINVAL; INIT_LIST_HEAD(&new_timer->it.cpu.entry); rcu_read_lock(); if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { if (pid == 0) { p = current; } else { p = find_task_by_vpid(pid); if (p && !same_thread_group(p, current)) p = NULL; } } else { if (pid == 0) { p = current->group_leader; } else { p = find_task_by_vpid(pid); if (p && !has_group_leader_pid(p)) p = NULL; } } new_timer->it.cpu.task = p; if (p) { get_task_struct(p); } else { ret = -EINVAL; } rcu_read_unlock(); return ret; }
/* * Called with tasklist_lock held for writing. * Unlink a traced task, and clean it up if it was a traced zombie. * Return true if it needs to be reaped with release_task(). * (We can't call release_task() here because we already hold tasklist_lock.) * * If it's a zombie, our attachedness prevented normal parent notification * or self-reaping. Do notification now if it would have happened earlier. * If it should reap itself, return true. * * If it's our own child, there is no notification to do. But if our normal * children self-reap, then this child was prevented by ptrace and we must * reap it now, in that case we must also wake up sub-threads sleeping in * do_wait(). */ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) { __ptrace_unlink(p); if (p->exit_state == EXIT_ZOMBIE) { if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, tracer)) do_notify_parent(p, p->exit_signal); else if (ignoring_children(tracer->sighand)) { __wake_up_parent(p, tracer); p->exit_signal = -1; } } if (task_detached(p)) { /* Mark it as in the process of being reaped. */ p->exit_state = EXIT_DEAD; return true; } } return false; }
/* * Accumulate raw cputime values of dead tasks (sig->[us]time) and live * tasks (sum on group iteration) belonging to @tsk's group. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; u64 utime, stime; struct task_struct *t; unsigned int seq, nextseq; unsigned long flags; /* * Update current task runtime to account pending time since last * scheduler action or thread_group_cputime() call. This thread group * might have other running tasks on different CPUs, but updating * their runtime can affect syscall performance, so we skip account * those pending times and rely only on values updated on tick or * other scheduler action. */ if (same_thread_group(current, tsk)) (void) task_sched_runtime(current); rcu_read_lock(); /* Attempt a lockless read on the first round. */ nextseq = 0; do { seq = nextseq; flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); times->utime = sig->utime; times->stime = sig->stime; times->sum_exec_runtime = sig->sum_sched_runtime; for_each_thread(tsk, t) { task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += read_sum_exec_runtime(t); } /* If lockless access failed, take the lock. */ nextseq = 1; } while (need_seqretry(&sig->stats_lock, seq));
/* * Locking issues: We need to protect the result of the id look up until * we get the timer locked down so it is not deleted under us. The * removal is done under the idr spinlock so we use that here to bridge * the find to the timer lock. To avoid a dead lock, the timer id MUST * be release with out holding the timer lock. */ static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; /* * Watch out here. We do a irqsave on the idr_lock and pass the * flags part over to the timer lock. Must not let interrupts in * while we are moving the lock. */ spin_lock_irqsave(&idr_lock, *flags); timr = idr_find(&posix_timers_id, (int)timer_id); if (timr) { spin_lock(&timr->it_lock); if (timr->it_process && same_thread_group(timr->it_process, current)) { spin_unlock(&idr_lock); return timr; } spin_unlock(&timr->it_lock); } spin_unlock_irqrestore(&idr_lock, *flags); return NULL; }
/* * This needs some heavy checking ... * I just haven't the stomach for it. I also don't fully * understand sessions/pgrp etc. Let somebody who does explain it. * * OK, I think I have the protection semantics right.... this is really * only important on a multi-user system anyway, to make sure one user * can't send a signal to a process owned by another. -TYT, 12/12/91 * * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; struct pid *pgrp; int err; if (!pid) pid = task_pid_vnr(group_leader); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ write_lock_irq(&tasklist_lock); err = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out; err = -EINVAL; if (!thread_group_leader(p)) goto out; if (same_thread_group(p->real_parent, group_leader)) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; if (p->did_exec) goto out; } else { err = -ESRCH; if (p != group_leader) goto out; } err = -EPERM; if (p->signal->leader) goto out; pgrp = task_pid(p); if (pgid != pid) { struct task_struct *g; pgrp = find_vpid(pgid); g = pid_task(pgrp, PIDTYPE_PGID); if (!g || task_session(g) != task_session(group_leader)) goto out; } err = security_task_setpgid(p, pgid); if (err) goto out; if (task_pgrp(p) != pgrp) { change_pid(p, PIDTYPE_PGID, pgrp); set_task_pgrp(p, pid_nr(pgrp)); } err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); return err; }
static int ptrace_attach(struct task_struct *task) { bool wait_trap = false; int retval; audit_ptrace(task); retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) goto out; if (same_thread_group(task, current)) goto out; /* * Protect exec's credential calculations against our interference; * interference; SUID, SGID and LSM creds get determined differently * under ptrace. */ retval = -ERESTARTNOINTR; if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); task_unlock(task); if (retval) goto unlock_creds; write_lock_irq(&tasklist_lock); retval = -EPERM; if (unlikely(task->exit_state)) goto unlock_tasklist; if (task->ptrace) goto unlock_tasklist; task->ptrace = PT_PTRACED; if (task_ns_capable(task, CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); spin_lock(&task->sighand->siglock); /* * If the task is already STOPPED, set GROUP_STOP_PENDING and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING * will be cleared if the child completes the transition or any * event which clears the group stop states happens. We'll wait * for the transition to complete before returning from this * function. * * This hides STOPPED -> RUNNING -> TRACED transition from the * attaching thread but a different thread in the same group can * still observe the transient RUNNING state. IOW, if another * thread's WNOHANG wait(2) on the stopped tracee races against * ATTACH, the wait(2) may fail due to the transient RUNNING. * * The following task_is_stopped() test is safe as both transitions * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task)) { task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; signal_wake_up(task, 1); wait_trap = true; } spin_unlock(&task->sighand->siglock); retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: if (wait_trap) wait_event(current->signal->wait_chldexit, !(task->group_stop & GROUP_STOP_TRAPPING)); return retval; }
int ptrace_attach(struct task_struct *task) { int retval; unsigned long flags; audit_ptrace(task); retval = -EPERM; if (same_thread_group(task, current)) goto out; /* Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently under ptrace. */ retval = mutex_lock_interruptible(&task->cred_exec_mutex); if (retval < 0) goto out; retval = -EPERM; repeat: /* * Nasty, nasty. * * We want to hold both the task-lock and the * tasklist_lock for writing at the same time. * But that's against the rules (tasklist_lock * is taken for reading by interrupts on other * cpu's that may have task_lock). */ task_lock(task); if (!write_trylock_irqsave(&tasklist_lock, flags)) { task_unlock(task); do { cpu_relax(); } while (!write_can_lock(&tasklist_lock)); goto repeat; } if (!task->mm) goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); if (retval) goto bad; /* Go */ task->ptrace |= PT_PTRACED; if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); mutex_unlock(&task->cred_exec_mutex); out: return retval; }
static void __oom_kill_process(struct task_struct *victim, const char *message) { struct task_struct *p; struct mm_struct *mm; bool can_oom_reap = true; p = find_lock_task_mm(victim); if (!p) { put_task_struct(victim); return; } else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; mmgrab(mm); /* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); memcg_memory_event_mm(mm, MEMCG_OOM_KILL); /* * We should send SIGKILL before granting access to memory reserves * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. */ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID); mark_oom_victim(victim); pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", message, task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), K(get_mm_counter(victim->mm, MM_FILEPAGES)), K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(p, victim)) continue; if (is_global_init(p)) { can_oom_reap = false; set_bit(MMF_OOM_SKIP, &mm->flags); pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); continue; } /* * No use_mm() user needs to read from the userspace so we are * ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID); } rcu_read_unlock(); if (can_oom_reap) wake_oom_reaper(victim); mmdrop(mm); put_task_struct(victim); }