/* * Munmap is split into 2 main parts -- this part which finds * what needs doing, and the areas themselves, which do the * work. This now handles partial unmappings. * Jeremy Fitzhardine <*****@*****.**> */ int do_munmap(unsigned long addr, unsigned long len) { struct vm_area_struct *vma, *free, **p; if ((addr & ~PAGE_MASK) || addr > KERNEL_BASE || len > KERNEL_BASE-addr) return -EINVAL; if ((len = PAGE_ALIGN(len)) == 0) return 0; /* * Check if this memory area is ok - put it on the temporary * list if so.. The checks here are pretty simple -- * every area affected in some way (by any overlap) is put * on the list. If nothing is put on, nothing is affected. */ free = NULL; for (vma = current->mm->mmap, p = ¤t->mm->mmap; vma; ) { if (vma->vm_start >= addr+len) break; if (vma->vm_end <= addr) { vma = vma->vm_next; continue; } *p = vma->vm_next; vma->vm_next = free; free = vma; vma = *p; } if (!free) return 0; /* * Ok - we have the memory areas we should free on the 'free' list, * so release them, and unmap the page range.. * If the one of the segments is only being partially unmapped, * it will put new vm_area_struct(s) into the address space. */ while (free) { unsigned long st, end; vma = free; free = free->vm_next; remove_shared_vm_struct(vma); st = addr < vma->vm_start ? vma->vm_start : addr; end = addr+len; end = end > vma->vm_end ? vma->vm_end : end; if (vma->vm_ops && vma->vm_ops->unmap) vma->vm_ops->unmap(vma, st, end-st); unmap_fixup(vma, st, end-st); kfree(vma); } unmap_page_range(addr, len); return 0; }
int sys_munmap(unsigned long addr, size_t len) { unsigned long base, limit; base = get_base(current->ldt[2]); /* map into ds */ limit = get_limit(0x17); /* ds limit */ if ((addr & 0xfff) || addr > 0x7fffffff || addr == 0 || addr + len > limit) return -EINVAL; if (unmap_page_range(base + addr, len)) return -EAGAIN; /* should never happen */ return 0; }
unsigned long do_munmap(unsigned long addr, int len) { struct vm_area_struct *mpnt, *prev, *next, **npp, *free; if((addr & ~PAGE_MASK) || (addr > PAGE_OFFSET) || (addr + len) > PAGE_OFFSET) return -EINVAL; if((len = PAGE_ALIGN(len)) == 0) return 0; mpnt = find_vma(current, addr); if(!mpnt) return 0; avl_neighbours(mpnt, current->mm->mmap_avl, &prev, &next); npp = (prev? &prev->vm_next: ¤t->mm->mmap); free = NULL; for(; mpnt && mpnt->vm_start < addr + len; mpnt = *npp) { *npp = mpnt->vm_next; mpnt->vm_next = free; free = mpnt; avl_remove(mpnt, ¤t->mm->mmap_avl); } if(free == NULL) return 0; while(free) { unsigned long st, end; mpnt = free; free = free->vm_next; remove_shared_vm_struct(mpnt); st = addr < mpnt->vm_start?mpnt->vm_start:addr; end = addr + len; end = end > mpnt->vm_end? mpnt->vm_end:end; if(mpnt->vm_ops && mpnt->vm_ops->unmap) mpnt->vm_ops->unmap(mpnt, st, end-st); unmap_fixup(mpnt, st, end-st); kfree(mpnt); } unmap_page_range(addr, len); return 0; }
bool __oom_reap_task_mm(struct mm_struct *mm) { struct vm_area_struct *vma; bool ret = true; /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ set_bit(MMF_UNSTABLE, &mm->flags); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { struct mmu_notifier_range range; struct mmu_gather tlb; mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, vma->vm_start, vma->vm_end); tlb_gather_mmu(&tlb, mm, range.start, range.end); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { tlb_finish_mmu(&tlb, range.start, range.end); ret = false; continue; } unmap_page_range(&tlb, vma, range.start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); } } return ret; }
static bool __oom_reap_task(struct task_struct *tsk) { struct mmu_gather tlb; struct vm_area_struct *vma; struct mm_struct *mm = NULL; struct task_struct *p; struct zap_details details = {.check_swap_entries = true, .ignore_dirty = true}; bool ret = true; /* * We have to make sure to not race with the victim exit path * and cause premature new oom victim selection: * __oom_reap_task exit_mm * atomic_inc_not_zero * mmput * atomic_dec_and_test * exit_oom_victim * [...] * out_of_memory * select_bad_process * # no TIF_MEMDIE task selects new victim * unmap_page_range # frees some memory */ mutex_lock(&oom_lock); /* * Make sure we find the associated mm_struct even when the particular * thread has already terminated and cleared its mm. * We might have race with exit path so consider our work done if there * is no mm. */ p = find_lock_task_mm(tsk); if (!p) goto unlock_oom; mm = p->mm; atomic_inc(&mm->mm_users); task_unlock(p); if (!down_read_trylock(&mm->mmap_sem)) { ret = false; goto unlock_oom; } tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (is_vm_hugetlb_page(vma)) continue; /* * mlocked VMAs require explicit munlocking before unmap. * Let's keep it simple here and skip such VMAs. */ if (vma->vm_flags & VM_LOCKED) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, &details); } tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); up_read(&mm->mmap_sem); /* * This task can be safely ignored because we cannot do much more * to release its memory. */ set_bit(MMF_OOM_REAPED, &mm->flags); unlock_oom: mutex_unlock(&oom_lock); /* * Drop our reference but make sure the mmput slow path is called from a * different context because we shouldn't risk we get stuck there and * put the oom_reaper out of the way. */ if (mm) mmput_async(mm); return ret; } #define MAX_OOM_REAP_RETRIES 10 static void oom_reap_task(struct task_struct *tsk) { int attempts = 0; /* Retry the down_read_trylock(mmap_sem) a few times */ while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk)) schedule_timeout_idle(HZ/10); if (attempts > MAX_OOM_REAP_RETRIES) { pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); debug_show_all_locks(); } /* * Clear TIF_MEMDIE because the task shouldn't be sitting on a * reasonably reclaimable memory anymore or it is not a good candidate * for the oom victim right now because it cannot release its memory * itself nor by the oom reaper. */ tsk->oom_reaper_list = NULL; exit_oom_victim(tsk); /* Drop a reference taken by wake_oom_reaper */ put_task_struct(tsk); }