/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * Note the "page_table_lock". It is to protect against kswapd removing * pages from under us. Note that kswapd only ever _removes_ pages, never * adds them. As such, once we have noticed that the page is not present, * we can drop the lock early. * * The adding of pages is protected by the MM semaphore (which we hold), * so we don't need to worry about a page being suddenly been added into * our VM. */ static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { pte_t entry; /* * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); entry = *pte; if (!pte_present(entry)) { /* * If it truly wasn't present, we know that kswapd * and the PTE updates will not touch it later. So * drop the lock. */ spin_unlock(&mm->page_table_lock); if (pte_none(entry)) return do_no_page(mm, vma, address, write_access, pte); return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access); } if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); return 1; }
/* * do_no_page() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if * the "write_access" parameter is true in order to avoid the next * page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. */ void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t * pgd; pmd_t * pmd; pte_t * page_table; pte_t entry; unsigned long page; pgd = pgd_offset(tsk->mm, address); pmd = pmd_alloc(pgd, address); if (!pmd) goto no_memory; page_table = pte_alloc(pmd, address); if (!page_table) goto no_memory; entry = *page_table; if (pte_present(entry)) goto is_present; if (!pte_none(entry)) goto swap_page; address &= PAGE_MASK; if (!vma->vm_ops || !vma->vm_ops->nopage) goto anonymous_page; /* * The third argument is "no_share", which tells the low-level code * to copy, not share the page even if sharing is possible. It's * essentially an early COW detection */ page = vma->vm_ops->nopage(vma, address, (vma->vm_flags & VM_SHARED)?0:write_access); if (!page) goto sigbus; ++tsk->maj_flt; ++vma->vm_mm->rss; /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid * for other architectures too. * * Note that if write_access is true, we either now have * a exclusive copy of the page, or this is a shared mapping, * so we can make it writable and dirty to avoid having to * handle that later. */ flush_page_to_ram(page); entry = mk_pte(page, vma->vm_page_prot); if (write_access) { entry = pte_mkwrite(pte_mkdirty(entry)); } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED)) entry = pte_wrprotect(entry); put_page(page_table, entry); /* no need to invalidate: a not-present page shouldn't be cached */ return; anonymous_page: entry = pte_wrprotect(mk_pte(ZERO_PAGE, vma->vm_page_prot)); if (write_access) { unsigned long page = __get_free_page(GFP_KERNEL); if (!page) goto sigbus; memset((void *) page, 0, PAGE_SIZE); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; flush_page_to_ram(page); } put_page(page_table, entry); return; sigbus: force_sig(SIGBUS, current); put_page(page_table, BAD_PAGE); /* no need to invalidate, wasn't present */ return; swap_page: do_swap_page(tsk, vma, address, page_table, entry, write_access); return; no_memory: oom(tsk); is_present: return; }