/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * Note the "page_table_lock". It is to protect against kswapd removing * pages from under us. Note that kswapd only ever _removes_ pages, never * adds them. As such, once we have noticed that the page is not present, * we can drop the lock early. * * The adding of pages is protected by the MM semaphore (which we hold), * so we don't need to worry about a page being suddenly been added into * our VM. */ static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { pte_t entry; /* * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); entry = *pte; if (!pte_present(entry)) { /* * If it truly wasn't present, we know that kswapd * and the PTE updates will not touch it later. So * drop the lock. */ spin_unlock(&mm->page_table_lock); if (pte_none(entry)) return do_no_page(mm, vma, address, write_access, pte); return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access); } if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); return 1; }
/* * This routine puts a long into any process space by following the page * tables. NOTE! You should check that the long isn't on a page boundary, * and that it is in the task area before calling this: this routine does * no checking. * * Now keeps R/W state of page so that a text page stays readonly * even if a debugger scribbles breakpoints into it. -M.U- */ static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr, unsigned long data) { pgd_t *pgdir; pmd_t *pgmiddle; pte_t *pgtable; unsigned long page; repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (!pgd_present(*pgdir)) { do_no_page(tsk, vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir)); pgd_clear(pgdir); return; } pgmiddle = pmd_offset(pgdir,addr); if (pmd_none(*pgmiddle)) { do_no_page(tsk, vma, addr, 1); goto repeat; } if (pmd_bad(*pgmiddle)) { printk("ptrace: bad page directory %08lx\n", pmd_val(*pgmiddle)); pmd_clear(pgmiddle); return; } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { do_no_page(tsk, vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { do_wp_page(tsk, vma, addr, 2); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ if (page < high_memory) { *(unsigned long *) (page + (addr & ~PAGE_MASK)) = data; flush_page_to_ram (page); } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ *pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot)); flush_tlb_all(); }
/* * the function for the no-page and wp_page */ void handle_pte_fault(struct vm_area_struct *vma, unsigned long address, pte_t *pte,int write_access) { // no page present if(!pte_present(*pte)) { do_no_page(vma,address,write_access); return; } *pte = pte_mkyoung(*pte); if(!write_access) { *pte = pte_mkdirty(*pte); return; } // write the shared page do_wp_page(vma, address, write_access); }
/* * The above separate functions for the no-page and wp-page * cases will go away (they mostly do the same thing anyway), * and we'll instead use only a general "handle_mm_fault()". * * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { if (!pte_present(*pte)) { do_no_page(current, vma, address, write_access); return; } set_pte(pte, pte_mkyoung(*pte)); flush_tlb_page(vma, address); if (!write_access) return; if (pte_write(*pte)) { set_pte(pte, pte_mkdirty(*pte)); flush_tlb_page(vma, address); return; } do_wp_page(current, vma, address, write_access); }
/* * This routine puts a long into any process space by following the page * tables. NOTE! You should check that the long isn't on a page boundary, * and that it is in the task area before calling this: this routine does * no checking. * * Now keeps R/W state of page so that a text page stays readonly * even if a debugger scribbles breakpoints into it. -M.U- */ static void put_long(struct vm_area_struct * vma, unsigned long addr, unsigned long data) { pgd_t *pgdir; pte_t *pgtable; unsigned long page; repeat: pgdir = PAGE_DIR_OFFSET(vma->vm_task, addr); if (!pgd_present(*pgdir)) { do_no_page(vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir)); pgd_clear(pgdir); return; } pgtable = (pte_t *) (PAGE_PTR(addr) + pgd_page(*pgdir)); if (!pte_present(*pgtable)) { do_no_page(vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { do_wp_page(vma, addr, 1); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ if (page < high_memory) { page += addr & ~PAGE_MASK; *(unsigned long *) page = data; } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ *pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot)); invalidate(); }
/* * This isn't really reliable by any means.. */ int mem_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) { struct task_struct *tsk; pgd_t *src_dir, *dest_dir; pmd_t *src_middle, *dest_middle; pte_t *src_table, *dest_table; unsigned long stmp, dtmp, mapnr; struct vm_area_struct *src_vma = NULL; /* Get the source's task information */ tsk = get_task(inode->i_ino >> 16); if (!tsk) return -ESRCH; /* Ensure that we have a valid source area. (Has to be mmap'ed and have valid page information.) We can't map shared memory at the moment because working out the vm_area_struct & nattach stuff isn't worth it. */ src_vma = tsk->mm->mmap; stmp = vma->vm_offset; while (stmp < vma->vm_offset + (vma->vm_end - vma->vm_start)) { while (src_vma && stmp > src_vma->vm_end) src_vma = src_vma->vm_next; if (!src_vma || (src_vma->vm_flags & VM_SHM)) return -EINVAL; src_dir = pgd_offset(tsk->mm, stmp); if (pgd_none(*src_dir)) return -EINVAL; if (pgd_bad(*src_dir)) { printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir)); return -EINVAL; } src_middle = pmd_offset(src_dir, stmp); if (pmd_none(*src_middle)) return -EINVAL; if (pmd_bad(*src_middle)) { printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle)); return -EINVAL; } src_table = pte_offset(src_middle, stmp); if (pte_none(*src_table)) return -EINVAL; if (stmp < src_vma->vm_start) { if (!(src_vma->vm_flags & VM_GROWSDOWN)) return -EINVAL; if (src_vma->vm_end - stmp > current->rlim[RLIMIT_STACK].rlim_cur) return -EINVAL; } stmp += PAGE_SIZE; } src_vma = tsk->mm->mmap; stmp = vma->vm_offset; dtmp = vma->vm_start; flush_cache_range(vma->vm_mm, vma->vm_start, vma->vm_end); flush_cache_range(src_vma->vm_mm, src_vma->vm_start, src_vma->vm_end); while (dtmp < vma->vm_end) { while (src_vma && stmp > src_vma->vm_end) src_vma = src_vma->vm_next; src_dir = pgd_offset(tsk->mm, stmp); src_middle = pmd_offset(src_dir, stmp); src_table = pte_offset(src_middle, stmp); dest_dir = pgd_offset(current->mm, dtmp); dest_middle = pmd_alloc(dest_dir, dtmp); if (!dest_middle) return -ENOMEM; dest_table = pte_alloc(dest_middle, dtmp); if (!dest_table) return -ENOMEM; if (!pte_present(*src_table)) do_no_page(tsk, src_vma, stmp, 1); if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table)) do_wp_page(tsk, src_vma, stmp, 1); set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); mapnr = MAP_NR(pte_page(*src_table)); if (mapnr < MAP_NR(high_memory)) mem_map[mapnr].count++; stmp += PAGE_SIZE; dtmp += PAGE_SIZE; } flush_tlb_range(vma->vm_mm, vma->vm_start, vma->vm_end); flush_tlb_range(src_vma->vm_mm, src_vma->vm_start, src_vma->vm_end); return 0; }
/* * Ugly, ugly, but the goto's result in better assembly.. */ int verify_area(int type, const void * addr, unsigned long size) { struct vm_area_struct * vma; unsigned long start = (unsigned long) addr; /* If the current user space is mapped to kernel space (for the * case where we use a fake user buffer with get_fs/set_fs()) we * don't expect to find the address in the user vm map. */ if (!size || get_fs() == KERNEL_DS) return 0; vma = find_vma(current->mm, start); if (!vma) goto bad_area; if (vma->vm_start > start) goto check_stack; good_area: if (type == VERIFY_WRITE) goto check_write; for (;;) { struct vm_area_struct * next; if (!(vma->vm_flags & VM_READ)) goto bad_area; if (vma->vm_end - start >= size) return 0; next = vma->vm_next; if (!next || vma->vm_end != next->vm_start) goto bad_area; vma = next; } check_write: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; if (!wp_works_ok) goto check_wp_fault_by_hand; for (;;) { if (vma->vm_end - start >= size) break; if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start) goto bad_area; vma = vma->vm_next; if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } return 0; check_wp_fault_by_hand: size--; size += start & ~PAGE_MASK; size >>= PAGE_SHIFT; start &= PAGE_MASK; for (;;) { do_wp_page(current, vma, start, 1); if (!size) break; size--; start += PAGE_SIZE; if (start < vma->vm_end) continue; vma = vma->vm_next; if (!vma || vma->vm_start != start) goto bad_area; if (!(vma->vm_flags & VM_WRITE)) goto bad_area;; } return 0; check_stack: if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, start) == 0) goto good_area; bad_area: return -EFAULT; }