/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN) { task_unlock(p); return 0; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes; points += get_mm_counter(p->mm, MM_SWAPENTS); points *= 1000; points /= totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }
/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. * * ps. we don't want to kill the process still in the uninterruptible state */ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN) { task_unlock(p); return 0; } /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes + get_mm_counter(p->mm, MM_SWAPENTS); task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) adj -= 30; /* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; /* * The unterruptible task cannot be killed, * which couldn't receive any signal including SIGKILL. */ if (p->state == TASK_UNINTERRUPTIBLE) points /= 16; /* * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ return points > 0 ? points : 1; }
/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Do not even consider tasks which are explicitly marked oom * unkillable or have been already oom reaped. */ adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN || test_bit(MMF_OOM_REAPED, &p->mm->flags)) { task_unlock(p); return 0; } /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm); task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= (points * 3) / 100; /* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; /* * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ return points > 0 ? points : 1; }
/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { task_unlock(p); return 0; } /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes + get_mm_counter(p->mm, MM_SWAPENTS); task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30 * totalpages / 1000; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj * totalpages / 1000; /* * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ return points > 0 ? points : 1; }
unsigned long oom_badness_low(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN) { task_unlock(p); return 0; } /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = CM(get_mm_counter(p->mm, MM_LOW_FILEPAGES)) + CM(get_mm_counter(p->mm, MM_LOW_ANONPAGES)) + p->mm->nr_ptes; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) adj -= 30; /* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; /* * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ return points > 0 ? points : 1; }
/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { int points; if (oom_unkillable_task(p, mem, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN * so the entire heuristic doesn't need to be executed for something * that cannot be killed. */ if (atomic_read(&p->mm->oom_disable_count)) { task_unlock(p); return 0; } /* * When the PF_OOM_ORIGIN bit is set, it indicates the task should have * priority for oom killing. */ if (p->flags & PF_OOM_ORIGIN) { task_unlock(p); return 1000; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes; points += get_mm_counter(p->mm, MM_SWAPENTS); points *= 1000; points /= totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }
/* * If this is a system OOM (not a memcg OOM) and the task selected to be * killed is not already running at high (RT) priorities, speed up the * recovery by boosting the dying task to the lowest FIFO priority. * That helps with the recovery and avoids interfering with RT tasks. */ static void boost_dying_task_prio(struct task_struct *p, struct mem_cgroup *mem) { struct sched_param param = { .sched_priority = 1 }; if (mem) return; if (!rt_task(p)) sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); } /* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ struct task_struct *find_lock_task_mm(struct task_struct *p) { struct task_struct *t = p; do { task_lock(t); if (likely(t->mm)) return t; task_unlock(t); } while_each_thread(p, t); return NULL; } /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, const struct mem_cgroup *mem, const nodemask_t *nodemask) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ if (mem && !task_in_mem_cgroup(p, mem)) return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } /** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { long points; if (oom_unkillable_task(p, mem, nodemask)) return 0; p = find_lock_task_mm(p); if (!p) return 0; /* * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN * so the entire heuristic doesn't need to be executed for something * that cannot be killed. */ if (atomic_read(&p->mm->oom_disable_count)) { task_unlock(p); return 0; } /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ if (!totalpages) totalpages = 1; /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + p->mm->nr_ptes; points += get_mm_counter(p->mm, swap_usage); points *= 1000; points /= totalpages; task_unlock(p); /* * Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) points -= 30; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ points += p->signal->oom_score_adj; /* * Never return 0 for an eligible task that may be killed since it's * possible that no single user task uses more than 0.1% of memory and * no single admin tasks uses more than 3.0%. */ if (points <= 0) return 1; return (points < 1000) ? points : 1000; }