static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; }
/* * If this is a system OOM (not a memcg OOM) and the task selected to be * killed is not already running at high (RT) priorities, speed up the * recovery by boosting the dying task to the lowest FIFO priority. * That helps with the recovery and avoids interfering with RT tasks. */ static void boost_dying_task_prio(struct task_struct *p, struct mem_cgroup *mem) { struct sched_param param = { .sched_priority = 1 }; if (mem) return; if (!rt_task(p)) sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); } /* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ struct task_struct *find_lock_task_mm(struct task_struct *p) { struct task_struct *t = p; do { task_lock(t); if (likely(t->mm)) return t; task_unlock(t); } while_each_thread(p, t); return NULL; } /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, const struct mem_cgroup *mem, const nodemask_t *nodemask) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ if (mem && !task_in_mem_cgroup(p, mem)) return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } /** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { int points; if (oom_unkillable_task(p, mem, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN * so the entire heuristic doesn't need to be executed for something * that cannot be killed. */ if (atomic_read(&p->mm->oom_disable_count)) { task_unlock(p); return 0; } /* * When the PF_OOM_ORIGIN bit is set, it indicates the task should have * priority for oom killing. */ if (p->flags & PF_OOM_ORIGIN) { task_unlock(p); return 1000; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss and swap space use. */ points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 / totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }
struct ion_client *ion_client_create(struct ion_device *dev, unsigned int heap_mask, const char *name) { struct ion_client *client; struct task_struct *task; struct rb_node **p; struct rb_node *parent = NULL; struct ion_client *entry; pid_t pid; unsigned int name_len; if (!name) { pr_err("%s: Name cannot be null\n", __func__); return ERR_PTR(-EINVAL); } name_len = strnlen(name, 64); get_task_struct(current->group_leader); task_lock(current->group_leader); pid = task_pid_nr(current->group_leader); /* don't bother to store task struct for kernel threads, they can't be killed anyway */ if (current->group_leader->flags & PF_KTHREAD) { put_task_struct(current->group_leader); task = NULL; } else { task = current->group_leader; } task_unlock(current->group_leader); client = kzalloc(sizeof(struct ion_client), GFP_KERNEL); if (!client) { if (task) put_task_struct(current->group_leader); return ERR_PTR(-ENOMEM); } client->dev = dev; client->handles = RB_ROOT; mutex_init(&client->lock); client->name = kzalloc(name_len+1, GFP_KERNEL); if (!client->name) { put_task_struct(current->group_leader); kfree(client); return ERR_PTR(-ENOMEM); } else { strlcpy(client->name, name, name_len+1); } client->heap_mask = heap_mask; client->task = task; client->pid = pid; mutex_lock(&dev->lock); p = &dev->clients.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct ion_client, node); if (client < entry) p = &(*p)->rb_left; else if (client > entry) p = &(*p)->rb_right; } rb_link_node(&client->node, parent, p); rb_insert_color(&client->node, &dev->clients); client->debug_root = debugfs_create_file(name, 0664, dev->debug_root, client, &debug_client_fops); mutex_unlock(&dev->lock); return client; }
static int android_oom_handler(struct notifier_block *nb, unsigned long val, void *data) { struct task_struct *tsk; #ifdef MULTIPLE_OOM_KILLER struct task_struct *selected[OOM_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef MULTIPLE_OOM_KILLER int selected_tasksize[OOM_DEPTH] = {0,}; int selected_oom_score_adj[OOM_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL/5, 1); unsigned long *freed = data; /* show status */ pr_warning("%s invoked Android-oom-killer: " "oom_adj=%d, oom_score_adj=%d\n", current->comm, current->signal->oom_adj, current->signal->oom_score_adj); dump_stack(); show_mem(SHOW_MEM_FILTER_NODES); if (__ratelimit(&oom_rs)) dump_tasks_info(); min_score_adj = 0; #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef MULTIPLE_OOM_KILLER int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; lowmem_print(2, "oom: ------ %d (%s), adj %d, size %d\n", p->pid, p->comm, oom_score_adj, tasksize); #ifdef MULTIPLE_OOM_KILLER if (all_selected_oom < OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < OOM_DEPTH) all_selected_oom++; if (all_selected_oom == OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "oom: max_selected_oom_idx(%d) select %d (%s), adj %d, \ size %d, to kill\n", max_selected_oom_idx, p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "oom: select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "oom: send sigkill to %d (%s), adj %d,\ size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i]); send_sig(SIGKILL, selected[i], 0); rem -= selected_tasksize[i]; *freed += (unsigned long)selected_tasksize[i]; #ifdef OOM_COUNT_READ oom_count++; #endif } } #else if (selected) { lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; *freed += (unsigned long)selected_tasksize; #ifdef OOM_COUNT_READ oom_count++; #endif } #endif read_unlock(&tasklist_lock); lowmem_print(2, "oom: get memory %lu", *freed); return rem; }
static int lowmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } if (min_adj == OOM_ADJUST_MAX + 1) return 0; if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (nr_to_scan <= 0) { lowmem_print(5, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); rem -= selected_tasksize; } else rem = -1; lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static void __oom_kill_process(struct task_struct *victim, const char *message) { struct task_struct *p; struct mm_struct *mm; bool can_oom_reap = true; p = find_lock_task_mm(victim); if (!p) { put_task_struct(victim); return; } else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; mmgrab(mm); /* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); memcg_memory_event_mm(mm, MEMCG_OOM_KILL); /* * We should send SIGKILL before granting access to memory reserves * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. */ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID); mark_oom_victim(victim); pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", message, task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), K(get_mm_counter(victim->mm, MM_FILEPAGES)), K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(p, victim)) continue; if (is_global_init(p)) { can_oom_reap = false; set_bit(MMF_OOM_SKIP, &mm->flags); pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); continue; } /* * No use_mm() user needs to read from the userspace so we are * ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID); } rcu_read_unlock(); if (can_oom_reap) wake_oom_reaper(victim); mmdrop(mm); put_task_struct(victim); }
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) goto bad_unshare_out; err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); if (err) goto bad_unshare_cleanup_fd; if (new_fs || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; spin_unlock(&fs->lock); } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_out: return err; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; check_unshare_flags(&unshare_flags); /* Return -EINVAL for all unsupported flags */ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_fs; if ((err = unshare_vm(unshare_flags, &new_mm))) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) goto bad_unshare_cleanup_fd; if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; write_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; write_unlock(&fs->lock); } if (new_mm) { mm = current->mm; active_mm = current->active_mm; current->mm = new_mm; current->active_mm = new_mm; if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { atomic_dec(&mm->oom_disable_count); atomic_inc(&new_mm->oom_disable_count); } activate_mm(active_mm, new_mm); new_mm = mm; } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_vm: if (new_mm) mmput(new_mm); bad_unshare_cleanup_sigh: if (new_sigh) if (atomic_dec_and_test(&new_sigh->count)) kmem_cache_free(sighand_cachep, new_sigh); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: return err; }
static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g, umask; struct task_struct *tracer; const struct cred *cred; pid_t ppid, tpid = 0, tgid, ngid; unsigned int max_fds = 0; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tracer = ptrace_parent(p); if (tracer) tpid = task_pid_nr_ns(tracer, ns); tgid = task_tgid_nr_ns(p, ns); ngid = task_numa_group_id(p); cred = get_task_cred(p); umask = get_task_umask(p); if (umask >= 0) seq_printf(m, "Umask:\t%#04o\n", umask); task_lock(p); if (p->files) max_fds = files_fdtable(p->files)->max_fds; task_unlock(p); rcu_read_unlock(); seq_printf(m, "State:\t%s", get_task_state(p)); seq_put_decimal_ull(m, "\nTgid:\t", tgid); seq_put_decimal_ull(m, "\nNgid:\t", ngid); seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns)); seq_put_decimal_ull(m, "\nPPid:\t", ppid); seq_put_decimal_ull(m, "\nTracerPid:\t", tpid); seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid)); seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid)); seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid)); seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid)); seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid)); seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid)); seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid)); seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid)); seq_put_decimal_ull(m, "\nFDSize:\t", max_fds); seq_puts(m, "\nGroups:\t"); group_info = cred->group_info; for (g = 0; g < group_info->ngroups; g++) seq_put_decimal_ull(m, g ? " " : "", from_kgid_munged(user_ns, group_info->gid[g])); put_cred(cred); /* Trailing space shouldn't have been added in the first place. */ seq_putc(m, ' '); #ifdef CONFIG_PID_NS seq_puts(m, "\nNStgid:"); for (g = ns->level; g <= pid->level; g++) seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpid:"); for (g = ns->level; g <= pid->level; g++) seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpgid:"); for (g = ns->level; g <= pid->level; g++) seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSsid:"); for (g = ns->level; g <= pid->level; g++) seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns)); #endif seq_putc(m, '\n'); }
static inline int ltt_enumerate_process_states(struct ltt_probe_private_data *call_data) { struct task_struct *t = &init_task; struct task_struct *p = t; enum lttng_process_status status; enum lttng_thread_type type; enum lttng_execution_mode mode; enum lttng_execution_submode submode; do { mode = LTTNG_MODE_UNKNOWN; submode = LTTNG_UNKNOWN; read_lock(&tasklist_lock); if (t != &init_task) { atomic_dec(&t->usage); t = next_thread(t); } if (t == p) { p = next_task(t); t = p; } atomic_inc(&t->usage); read_unlock(&tasklist_lock); task_lock(t); if (t->exit_state == EXIT_ZOMBIE) status = LTTNG_ZOMBIE; else if (t->exit_state == EXIT_DEAD) status = LTTNG_DEAD; else if (t->state == TASK_RUNNING) { /* Is this a forked child that has not run yet? */ if (list_empty(&t->run_list)) status = LTTNG_WAIT_FORK; else /* * All tasks are considered as wait_cpu; * the viewer will sort out if the task was * really running at this time. */ status = LTTNG_WAIT_CPU; } else if (t->state & (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { /* Task is waiting for something to complete */ status = LTTNG_WAIT; } else status = LTTNG_UNNAMED; submode = LTTNG_NONE; /* * Verification of t->mm is to filter out kernel threads; * Viewer will further filter out if a user-space thread was * in syscall mode or not. */ if (t->mm) type = LTTNG_USER_THREAD; else type = LTTNG_KERNEL_THREAD; __trace_mark(0, list_process_state, call_data, "pid %d parent_pid %d name %s type %d mode %d " "submode %d status %d tgid %d", t->pid, t->parent->pid, t->comm, type, mode, submode, status, t->tgid); task_unlock(t); } while (t != &init_task); return 0; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) goto bad_unshare_out; err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); if (err) goto bad_unshare_cleanup_fd; if (new_fs || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; spin_unlock(&fs->lock); } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_out: return err; }
static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; struct fdtable *fdt = NULL; const struct cred *cred; pid_t ppid, tpid; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tpid = 0; if (pid_alive(p)) { struct task_struct *tracer = ptrace_parent(p); if (tracer) tpid = task_pid_nr_ns(tracer, ns); } cred = get_task_cred(p); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" "Ngid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), task_tgid_nr_ns(p, ns), task_numa_group_id(p), pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), from_kuid_munged(user_ns, cred->suid), from_kuid_munged(user_ns, cred->fsuid), from_kgid_munged(user_ns, cred->gid), from_kgid_munged(user_ns, cred->egid), from_kgid_munged(user_ns, cred->sgid), from_kgid_munged(user_ns, cred->fsgid)); task_lock(p); if (p->files) fdt = files_fdtable(p->files); seq_printf(m, "FDSize:\t%d\n" "Groups:\t", fdt ? fdt->max_fds : 0); rcu_read_unlock(); group_info = cred->group_info; task_unlock(p); for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); seq_putc(m, '\n'); }
// an exact copy of task_threads() except no mig conversion at the end! static kern_return_t chudxnu_private_task_threads( task_t task, thread_act_array_t *threads_out, mach_msg_type_number_t *count) { mach_msg_type_number_t actual; thread_t *thread_list; thread_t thread; vm_size_t size, size_needed; void *addr; unsigned int i, j; if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); size = 0; addr = NULL; for (;;) { task_lock(task); if (!task->active) { task_unlock(task); if (size != 0) kfree(addr, size); return (KERN_FAILURE); } actual = task->thread_count; /* do we have the memory we need? */ size_needed = actual * sizeof (mach_port_t); if (size_needed <= size) break; /* unlock the task and allocate more memory */ task_unlock(task); if (size != 0) kfree(addr, size); assert(size_needed > 0); size = size_needed; addr = kalloc(size); if (addr == 0) return (KERN_RESOURCE_SHORTAGE); } /* OK, have memory and the task is locked & active */ thread_list = (thread_t *)addr; i = j = 0; for (thread = (thread_t)queue_first(&task->threads); i < actual; ++i, thread = (thread_t)queue_next(&thread->task_threads)) { thread_reference_internal(thread); thread_list[j++] = thread; } assert(queue_end(&task->threads, (queue_entry_t)thread)); actual = j; size_needed = actual * sizeof (mach_port_t); /* can unlock task now that we've got the thread refs */ task_unlock(task); if (actual == 0) { /* no threads, so return null pointer and deallocate memory */ *threads_out = NULL; *count = 0; if (size != 0) kfree(addr, size); } else { /* if we allocated too much, must copy */ if (size_needed < size) { void *newaddr; newaddr = kalloc(size_needed); if (newaddr == 0) { for (i = 0; i < actual; ++i) thread_deallocate(thread_list[i]); kfree(addr, size); return (KERN_RESOURCE_SHORTAGE); } bcopy(addr, newaddr, size_needed); kfree(addr, size); thread_list = (thread_t *)newaddr; } *threads_out = thread_list; *count = actual; } return (KERN_SUCCESS); }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; /* * If unsharing a user namespace must also unshare the thread. */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; /* * If unsharing a pid namespace must also unshare the thread. */ if (unshare_flags & CLONE_NEWPID) unshare_flags |= CLONE_THREAD; /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; /* * If unsharing a signal handlers, must also unshare the signal queues. */ if (unshare_flags & CLONE_SIGHAND) unshare_flags |= CLONE_THREAD; /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) goto bad_unshare_out; err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; err = unshare_userns(unshare_flags, &new_cred); if (err) goto bad_unshare_cleanup_fd; err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, new_fs); if (err) goto bad_unshare_cleanup_cred; if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) switch_task_namespaces(current, new_nsproxy); task_lock(current); if (new_fs) { fs = current->fs; spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; spin_unlock(&fs->lock); } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); if (new_cred) { /* Install the new user namespace */ commit_creds(new_cred); new_cred = NULL; } } bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_out: return err; }
/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { int points; if (oom_unkillable_task(p, mem, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN * so the entire heuristic doesn't need to be executed for something * that cannot be killed. */ if (atomic_read(&p->mm->oom_disable_count)) { task_unlock(p); return 0; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes; points += get_mm_counter(p->mm, MM_SWAPENTS); points *= 1000; points /= totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; tracehook_init_task(p); ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_fork_read_lock(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { task_lock(p); if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) atomic_dec(&p->mm->oom_disable_count); task_unlock(p); mmput(p->mm); } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) __cleanup_signal(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; int ret; if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; ret = copy_thread_group_keys(tsk); if (ret < 0) { kmem_cache_free(signal_cachep, sig); return ret; } atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; sig->it_prof_expires = cputime_zero; sig->it_prof_incr = cputime_zero; sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; task_io_accounting_init(&sig->ioac); sig->sum_sched_runtime = 0; INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); taskstats_tgid_init(sig); task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { /* * New sole thread in the process gets an expiry time * of the whole CPU time limit. */ tsk->it_prof_expires = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); } acct_init_pacct(&sig->pacct); tty_audit_fork(sig); return 0; }
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; if (clone_flags & CLONE_THREAD) return 0; sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; if (clone_flags & CLONE_NEWPID) sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; sig->tty = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->prev_utime = sig->prev_stime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; sig->maxrss = sig->cmaxrss = 0; task_io_accounting_init(&sig->ioac); sig->sum_sched_runtime = 0; taskstats_tgid_init(sig); task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); posix_cpu_timers_init_group(sig); acct_init_pacct(&sig->pacct); tty_audit_fork(sig); sched_autogroup_fork(sig); #ifdef CONFIG_CGROUPS init_rwsem(&sig->threadgroup_fork_lock); #endif sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; return 0; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef ENHANCED_LMK_ROUTINE int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); read_unlock(&tasklist_lock); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "select %d (%s), adj %d, \ size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i]); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[i], 0); set_tsk_thread_flag(selected[i], TIF_MEMDIE); rem -= selected_tasksize[i]; #ifdef LMK_COUNT_READ lmk_count++; #endif } } #else if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; #ifdef LMK_COUNT_READ lmk_count++; #endif } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) { struct mount *r = real_mount(mnt); int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; if (sb->s_op->show_devname) { err = sb->s_op->show_devname(m, mnt_path.dentry); if (err) goto out; } else { mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } seq_putc(m, ' '); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); show_type(m, sb); seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; show_mnt_opts(m, mnt); if (sb->s_op->show_options2) err = sb->s_op->show_options2(mnt, m, mnt_path.dentry); else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: return err; } static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) { struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct path root = p->root; int err = 0; seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id, MAJOR(sb->s_dev), MINOR(sb->s_dev)); if (sb->s_op->show_path) err = sb->s_op->show_path(m, mnt->mnt_root); else seq_dentry(m, mnt->mnt_root, " \t\n\\"); if (err) goto out; seq_putc(m, ' '); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ err = seq_path_root(m, &mnt_path, &root, " \t\n\\"); if (err) goto out; seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); show_mnt_opts(m, mnt); /* Tagged fields ("foo:X" or "bar") */ if (IS_MNT_SHARED(r)) seq_printf(m, " shared:%i", r->mnt_group_id); if (IS_MNT_SLAVE(r)) { int master = r->mnt_master->mnt_group_id; int dom = get_dominating_id(r, &p->root); seq_printf(m, " master:%i", master); if (dom && dom != master) seq_printf(m, " propagate_from:%i", dom); } if (IS_MNT_UNBINDABLE(r)) seq_puts(m, " unbindable"); /* Filesystem specific data */ seq_puts(m, " - "); show_type(m, sb); seq_putc(m, ' '); if (sb->s_op->show_devname) err = sb->s_op->show_devname(m, mnt->mnt_root); else mangle(m, r->mnt_devname ? r->mnt_devname : "none"); if (err) goto out; seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; if (sb->s_op->show_options2) { err = sb->s_op->show_options2(mnt, m, mnt->mnt_root); } else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out: return err; } static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) { struct mount *r = real_mount(mnt); struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; int err = 0; /* device */ if (sb->s_op->show_devname) { seq_puts(m, "device "); err = sb->s_op->show_devname(m, mnt_path.dentry); } else { if (r->mnt_devname) { seq_puts(m, "device "); mangle(m, r->mnt_devname); } else seq_puts(m, "no device"); } /* mount point */ seq_puts(m, " mounted on "); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); /* file system type */ seq_puts(m, "with fstype "); show_type(m, sb); /* optional statistics */ if (sb->s_op->show_stats) { seq_putc(m, ' '); if (!err) err = sb->s_op->show_stats(m, mnt_path.dentry); } seq_putc(m, '\n'); return err; } static int mounts_open_common(struct inode *inode, struct file *file, int (*show)(struct seq_file *, struct vfsmount *)) { struct task_struct *task = get_proc_task(inode); struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct path root; struct proc_mounts *p; int ret = -EINVAL; if (!task) goto err; rcu_read_lock(); nsp = task_nsproxy(task); if (!nsp) { rcu_read_unlock(); put_task_struct(task); goto err; } ns = nsp->mnt_ns; if (!ns) { rcu_read_unlock(); put_task_struct(task); goto err; } get_mnt_ns(ns); rcu_read_unlock(); task_lock(task); if (!task->fs) { task_unlock(task); put_task_struct(task); ret = -ENOENT; goto err_put_ns; } get_fs_root(task->fs, &root); task_unlock(task); put_task_struct(task); ret = -ENOMEM; p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); if (!p) goto err_put_path; file->private_data = &p->m; ret = seq_open(file, &mounts_op); if (ret) goto err_free; p->ns = ns; p->root = root; p->m.poll_event = ns->event; p->show = show; return 0; err_free: kfree(p); err_put_path: path_put(&root); err_put_ns: put_mnt_ns(ns); err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { struct proc_mounts *p = proc_mounts(file->private_data); path_put(&p->root); put_mnt_ns(p->ns); return seq_release(inode, file); } static int mounts_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsmnt); } static int mountinfo_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_mountinfo); } static int mountstats_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsstat); } const struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountstats_operations = { .open = mountstats_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, };
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE static DEFINE_RATELIMIT_STATE(lmk_rs, DEFAULT_RATELIMIT_INTERVAL, 0); #else static DEFINE_RATELIMIT_STATE(lmk_rs, 6*DEFAULT_RATELIMIT_INTERVAL, 0); #endif #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif #ifdef CONFIG_SAMP_HOTNESS int selected_hotness_adj = 0; #endif int array_size = ARRAY_SIZE(lowmem_adj); #if (!defined(CONFIG_MACH_JF) \ && !defined(CONFIG_SEC_PRODUCT_8960)\ ) unsigned long nr_to_scan = sc->nr_to_scan; #endif struct reclaim_state *reclaim_state = current->reclaim_state; #ifndef CONFIG_CMA int other_free = global_page_state(NR_FREE_PAGES); #else int other_free = global_page_state(NR_FREE_PAGES) - global_page_state(NR_FREE_CMA_PAGES); #endif int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); #if defined(CONFIG_RUNTIME_COMPCACHE) || defined(CONFIG_ZSWAP) other_file -= total_swapcache_pages; #endif /* CONFIG_RUNTIME_COMPCACHE || CONFIG_ZSWAP */ if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef ENHANCED_LMK_ROUTINE int is_exist_oom_task = 0; #endif #ifdef CONFIG_SAMP_HOTNESS int hotness_adj = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); read_unlock(&tasklist_lock); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); #if defined(CONFIG_ZSWAP) if (atomic_read(&zswap_stored_pages)) { lowmem_print(3, "shown tasksize : %d\n", tasksize); tasksize += atomic_read(&zswap_pool_pages) * get_mm_counter(p->mm, MM_SWAPENTS) / atomic_read(&zswap_stored_pages); lowmem_print(3, "real tasksize : %d\n", tasksize); } #endif #ifdef CONFIG_SAMP_HOTNESS hotness_adj = p->signal->hotness_adj; #endif task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "select %d (%s), adj %d, \ size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { #ifdef CONFIG_SAMP_HOTNESS if (min_score_adj <= lowmem_adj[4]) { #endif if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; #ifdef CONFIG_SAMP_HOTNESS } else { if (hotness_adj > selected_hotness_adj) continue; if (hotness_adj == selected_hotness_adj && tasksize <= selected_tasksize) continue; } #endif } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; #ifdef CONFIG_SAMP_HOTNESS selected_hotness_adj = hotness_adj; #endif lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { #ifdef CONFIG_SAMP_HOTNESS lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d, free memory = %d, reclaimable memory = %d ,hotness %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], other_free, other_file, selected_hotness_adj); #else lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d, free memory = %d, reclaimable memory = %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], other_free, other_file); #endif lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[i], 0); set_tsk_thread_flag(selected[i], TIF_MEMDIE); rem -= selected_tasksize[i]; if(reclaim_state) reclaim_state->reclaimed_slab += selected_tasksize[i]; #ifdef LMK_COUNT_READ lmk_count++; #endif } } #else if (selected) { #ifdef CONFIG_SAMP_HOTNESS lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d ,hotness %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize,selected_hotness_adj); #else lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); #endif lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; if(reclaim_state) reclaim_state->reclaimed_slab = selected_tasksize; #ifdef LMK_COUNT_READ lmk_count++; #endif } #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO if (__ratelimit(&lmk_rs)) { lowmem_print(1, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE show_mem(SHOW_MEM_FILTER_NODES); dump_tasks_info(); #endif } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *p; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; #else int selected_tasksize = 0; int selected_oom_adj; #endif int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (lowmem_deathpending[i] && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; } #else if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; #endif if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_adj[i] = min_adj; #else selected_oom_adj = min_adj; #endif read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (all_selected_oom >= LOWMEM_DEATHPENDING_DEPTH) { if (oom_adj < selected_oom_adj[i]) continue; if (oom_adj == selected_oom_adj[i] && tasksize <= selected_tasksize[i]) continue; } else if (selected[i]) continue; selected[i] = p; selected_tasksize[i] = tasksize; selected_oom_adj[i] = oom_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); break; } #else if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_adj[i], selected_tasksize[i]); lowmem_deathpending[i] = selected[i]; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected[i]); rem -= selected_tasksize[i]; } } #else if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); rem -= selected_tasksize; } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
struct ion_client *ion_client_create(struct ion_device *dev, unsigned int heap_mask, const char *name) { struct ion_client *client; struct task_struct *task; struct rb_node **p; struct rb_node *parent = NULL; struct ion_client *entry; pid_t pid; unsigned int name_len = strnlen(name, 64); get_task_struct(current->group_leader); task_lock(current->group_leader); pid = task_pid_nr(current->group_leader); /* don't bother to store task struct for kernel threads, they can't be killed anyway */ if (current->group_leader->flags & PF_KTHREAD) { put_task_struct(current->group_leader); task = NULL; } else { task = current->group_leader; } task_unlock(current->group_leader); /* if this isn't a kernel thread, see if a client already exists */ if (task) { client = ion_client_lookup(dev, task); if (!IS_ERR_OR_NULL(client)) { put_task_struct(current->group_leader); return client; } } client = kzalloc(sizeof(struct ion_client), GFP_KERNEL); if (!client) { put_task_struct(current->group_leader); return ERR_PTR(-ENOMEM); } client->dev = dev; client->handles = RB_ROOT; mutex_init(&client->lock); client->name = kzalloc(name_len+1, GFP_KERNEL); if (!client->name) { put_task_struct(current->group_leader); kfree(client); return ERR_PTR(-ENOMEM); } else { strlcpy(client->name, name, name_len+1); } client->heap_mask = heap_mask; client->task = task; client->pid = pid; kref_init(&client->ref); mutex_lock(&dev->lock); if (task) { p = &dev->user_clients.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct ion_client, node); if (task < entry->task) p = &(*p)->rb_left; else if (task > entry->task) p = &(*p)->rb_right; } rb_link_node(&client->node, parent, p); rb_insert_color(&client->node, &dev->user_clients); } else {