bool __oom_reap_task_mm(struct mm_struct *mm) { struct vm_area_struct *vma; bool ret = true; /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ set_bit(MMF_UNSTABLE, &mm->flags); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { struct mmu_notifier_range range; struct mmu_gather tlb; mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, vma->vm_start, vma->vm_end); tlb_gather_mmu(&tlb, mm, range.start, range.end); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { tlb_finish_mmu(&tlb, range.start, range.end); ret = false; continue; } unmap_page_range(&tlb, vma, range.start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); } } return ret; }
/* * remove user pages in a given range. */ void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) { mmu_gather_t *tlb; pgd_t * dir; unsigned long start = address, end = address + size; int freed = 0; dir = pgd_offset(mm, address); /* * This is a long-lived spinlock. That's fine. * There's no contention, because the page table * lock only protects against kswapd anyway, and * even if kswapd happened to be looking at this * process we _want_ it to get stuck. */ if (address >= end) BUG(); spin_lock(&mm->page_table_lock); flush_cache_range(mm, address, end); tlb = tlb_gather_mmu(mm); do { freed += zap_pmd_range(tlb, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); /* this will flush any remaining tlb entries */ tlb_finish_mmu(tlb, start, end); /* * Update rss for the mm_struct (not necessarily current->mm) * Notice that rss is an unsigned long. */ if (mm->rss > freed) mm->rss -= freed; else mm->rss = 0; spin_unlock(&mm->page_table_lock); }
static bool __oom_reap_task(struct task_struct *tsk) { struct mmu_gather tlb; struct vm_area_struct *vma; struct mm_struct *mm = NULL; struct task_struct *p; struct zap_details details = {.check_swap_entries = true, .ignore_dirty = true}; bool ret = true; /* * We have to make sure to not race with the victim exit path * and cause premature new oom victim selection: * __oom_reap_task exit_mm * atomic_inc_not_zero * mmput * atomic_dec_and_test * exit_oom_victim * [...] * out_of_memory * select_bad_process * # no TIF_MEMDIE task selects new victim * unmap_page_range # frees some memory */ mutex_lock(&oom_lock); /* * Make sure we find the associated mm_struct even when the particular * thread has already terminated and cleared its mm. * We might have race with exit path so consider our work done if there * is no mm. */ p = find_lock_task_mm(tsk); if (!p) goto unlock_oom; mm = p->mm; atomic_inc(&mm->mm_users); task_unlock(p); if (!down_read_trylock(&mm->mmap_sem)) { ret = false; goto unlock_oom; } tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (is_vm_hugetlb_page(vma)) continue; /* * mlocked VMAs require explicit munlocking before unmap. * Let's keep it simple here and skip such VMAs. */ if (vma->vm_flags & VM_LOCKED) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, &details); } tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); up_read(&mm->mmap_sem); /* * This task can be safely ignored because we cannot do much more * to release its memory. */ set_bit(MMF_OOM_REAPED, &mm->flags); unlock_oom: mutex_unlock(&oom_lock); /* * Drop our reference but make sure the mmput slow path is called from a * different context because we shouldn't risk we get stuck there and * put the oom_reaper out of the way. */ if (mm) mmput_async(mm); return ret; } #define MAX_OOM_REAP_RETRIES 10 static void oom_reap_task(struct task_struct *tsk) { int attempts = 0; /* Retry the down_read_trylock(mmap_sem) a few times */ while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk)) schedule_timeout_idle(HZ/10); if (attempts > MAX_OOM_REAP_RETRIES) { pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); debug_show_all_locks(); } /* * Clear TIF_MEMDIE because the task shouldn't be sitting on a * reasonably reclaimable memory anymore or it is not a good candidate * for the oom victim right now because it cannot release its memory * itself nor by the oom reaper. */ tsk->oom_reaper_list = NULL; exit_oom_victim(tsk); /* Drop a reference taken by wake_oom_reaper */ put_task_struct(tsk); }