/* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, const struct mem_cgroup *memcg, const nodemask_t *nodemask) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ if (memcg && !task_in_mem_cgroup(p, memcg)) return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; }
/* * If this is a system OOM (not a memcg OOM) and the task selected to be * killed is not already running at high (RT) priorities, speed up the * recovery by boosting the dying task to the lowest FIFO priority. * That helps with the recovery and avoids interfering with RT tasks. */ static void boost_dying_task_prio(struct task_struct *p, struct mem_cgroup *mem) { struct sched_param param = { .sched_priority = 1 }; if (mem) return; if (!rt_task(p)) sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); } /* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ struct task_struct *find_lock_task_mm(struct task_struct *p) { struct task_struct *t = p; do { task_lock(t); if (likely(t->mm)) return t; task_unlock(t); } while_each_thread(p, t); return NULL; } /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, const struct mem_cgroup *mem, const nodemask_t *nodemask) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ if (mem && !task_in_mem_cgroup(p, mem)) return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } /** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { long points; if (oom_unkillable_task(p, mem, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN * so the entire heuristic doesn't need to be executed for something * that cannot be killed. */ if (atomic_read(&p->mm->oom_disable_count)) { task_unlock(p); return 0; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes; points += get_mm_counter(p->mm, swap_usage); points *= 1000; points /= totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }