unsigned long l4x_set_pte(struct mm_struct *mm, unsigned long addr, pte_t old, pte_t pteval) { /* * Check if any invalidation is necessary * * Invalidation (flush) necessary if: * old page was present * new page is not present OR * new page has another physical address OR * new page has another protection OR * new page has other access attributes */ /* old was present && new not -> flush */ int flush_rights = L4_FPAGE_RWX; #if 0 if ((pte_val(old) & PAGE_MASK) != (pte_val(pteval) & PAGE_MASK)) printk("spte %x->%x\n", pte_val(old), pte_val(pteval)); #endif if (pte_present(pteval)) { /* new page is present, * now we have to find out what has changed */ if (((pte_val(old) ^ pte_val(pteval)) & PAGE_MASK) || (pte_young(old) && !pte_young(pteval))) { /* physical page frame changed * || access attribute changed -> flush */ /* flush is the default */ //pteval.pte_low &= ~_PAGE_MAPPED; pteval = __pte(pte_val(pteval) & ~_PAGE_MAPPED); } else if ((pte_write(old) && !pte_write(pteval)) || (pte_dirty(old) && !pte_dirty(pteval))) { /* Protection changed from r/w to ro * or page now clean -> remap */ flush_rights = L4_FPAGE_W; check_pte_mapped(old, pteval, "RW->RO"); } else { /* nothing changed, simply return */ check_pte_mapped(old, pteval, "NoChg"); return pte_val(pteval); } } /* Ok, now actually flush or remap the page */ L4XV_FN_v(l4x_flush_page(mm, pte_val(old), addr, PAGE_SHIFT, flush_rights)); return pte_val(pteval); }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * Note the "page_table_lock". It is to protect against kswapd removing * pages from under us. Note that kswapd only ever _removes_ pages, never * adds them. As such, once we have noticed that the page is not present, * we can drop the lock early. * * The adding of pages is protected by the MM semaphore (which we hold), * so we don't need to worry about a page being suddenly been added into * our VM. */ static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { pte_t entry; /* * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); entry = *pte; if (!pte_present(entry)) { /* * If it truly wasn't present, we know that kswapd * and the PTE updates will not touch it later. So * drop the lock. */ spin_unlock(&mm->page_table_lock); if (pte_none(entry)) return do_no_page(mm, vma, address, write_access, pte); return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access); } if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); return 1; }
void l4x_vmalloc_map_vm_area(unsigned long address, unsigned long end) { if (address & ~PAGE_MASK) enter_kdebug("map_vm_area: Unaligned address!"); for (; address < end; address += PAGE_SIZE) { pte_t *ptep; #ifdef ARCH_arm unsigned long o; if ((o = l4x_arm_is_selfmapped_addr(address))) { address += o - PAGE_SIZE; continue; } #endif ptep = lookup_pte(swapper_pg_dir, address); if (!ptep || !pte_present(*ptep)) { if (0) printk("%s: No (valid) PTE for %08lx?!" " (ptep: %p, pte: %08" #ifndef ARCH_arm "l" #endif "x\n", __func__, address, ptep, pte_val(*ptep)); continue; } l4x_virtual_mem_register(address, pte_val(*ptep)); l4lx_memory_map_virtual_page(address, pte_val(*ptep), pte_write(*ptep)); } }
/* * Do a quick page-table lookup for a single page. */ static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; pmd = pmd_offset(pgd, address); if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; ptep = pte_offset(pmd, address); if (!ptep) goto out; pte = *ptep; if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) return pte_page(pte); } out: return 0; }
/* this routine handles present pages, when users try to write to a shared page. */ void do_wp_page(struct vm_area_struct *vma, unsigned long address, int write_access) { pgd_t *pgd; pmd_t *pmd; pte_t *page_table,pte; unsigned long old_page, new_page; new_page = get_free_page(GFP_KERNEL); pgd = pgd_offset(vma->vm_task, address); if(pgd_none(*pgd)) goto end_wp_page; if(pgd_bad(*pgd)) goto bad_wp_page; pmd = pmd_offset(pgd,address); if(pmd_none(*pmd)) goto end_wp_page; if(pmd_bad(*pmd)) goto bad_wp_page; page_table = pte_offset(pmd,address); pte = *page_table; if(!pte_present(pte)) goto end_wp_page; if(pte_write(pte)) goto end_wp_page; old_page = pte_page(pte); if(old_page >= main_memory_end) goto bad_wp_page; (vma->vm_task->mm->min_flt)++; if(mem_map[MAP_NR(old_page)].flags & PAGE_PRESENT) { if(new_page) { if(mem_map[MAP_NR(old_page)].flags & MAP_PAGE_RESERVED) ++(vma->vm_task->mm->rss); copy_page(old_page, new_page); *page_table = pte_mkwrite(pte_mkdirty(mk_pte((unsigned long)&new_page, vma->vm_page_prot))); free_page(old_page); return; } pte_val(*page_table) &= PAGE_BAD; free_page(old_page); oom(); return; } *page_table = pte_mkdirty(pte_mkwrite(pte)); if(new_page) free_page(new_page); return; bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); goto end_wp_page; end_wp_page: if(new_page) free_page(new_page); return; }
/* Remap IO memory, the same way as remap_pfn_range(), but use * the obio memory space. * * They use a pgprot that sets PAGE_IO and does not check the * mem_map table as this is independent of normal memory. */ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot, int space) { unsigned long end; /* clear hack bit that was used as a write_combine side-effect flag */ offset &= ~0x1UL; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t entry; unsigned long curend = address + PAGE_SIZE; entry = mk_pte_io(offset, prot, space, PAGE_SIZE); if (!(address & 0xffff)) { if (PAGE_SIZE < (4 * 1024 * 1024) && !(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { entry = mk_pte_io(offset, prot, space, 4 * 1024 * 1024); curend = address + 0x400000; offset += 0x400000; } else if (PAGE_SIZE < (512 * 1024) && !(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { entry = mk_pte_io(offset, prot, space, 512 * 1024 * 1024); curend = address + 0x80000; offset += 0x80000; } else if (PAGE_SIZE < (64 * 1024) && !(offset & 0xfffe) && end >= address + 0x10000) { entry = mk_pte_io(offset, prot, space, 64 * 1024); curend = address + 0x10000; offset += 0x10000; } else offset += PAGE_SIZE; } else offset += PAGE_SIZE; if (pte_write(entry)) entry = pte_mkdirty(entry); do { BUG_ON(!pte_none(*pte)); set_pte_at(mm, address, pte, entry); address += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; pte++; } while (address < curend); } while (address < end); }
static int mem_write(struct inode * inode, struct file * file,char * buf, int count) { pgd_t *page_dir; pmd_t *page_middle; pte_t pte; char * page; struct task_struct * tsk; unsigned long addr; char *tmp; int i; if (count < 0) return -EINVAL; addr = file->f_pos; tsk = get_task(inode->i_ino >> 16); if (!tsk) return -ESRCH; tmp = buf; while (count > 0) { if (current->signal & ~current->blocked) break; page_dir = pgd_offset(tsk,addr); if (pgd_none(*page_dir)) break; if (pgd_bad(*page_dir)) { printk("Bad page dir entry %08lx\n", pgd_val(*page_dir)); pgd_clear(page_dir); break; } page_middle = pmd_offset(page_dir,addr); if (pmd_none(*page_middle)) break; if (pmd_bad(*page_middle)) { printk("Bad page middle entry %08lx\n", pmd_val(*page_middle)); pmd_clear(page_middle); break; } pte = *pte_offset(page_middle,addr); if (!pte_present(pte)) break; if (!pte_write(pte)) break; page = (char *) pte_page(pte) + (addr & ~PAGE_MASK); i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > count) i = count; memcpy_fromfs(page, tmp, i); addr += i; tmp += i; count -= i; } file->f_pos = addr; if (tmp != buf) return tmp-buf; if (current->signal & ~current->blocked) return -ERESTARTSYS; return 0; }
/* * fault_is_resolved() * Return true if the fault appears to be resolved. */ STATIC int fault_is_resolved(struct pt_regs *regs, unsigned long missqw0, unsigned long missqw1) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep; unsigned long address; unsigned long src = MMU_MISSQW1_SRC_GET(missqw1); unsigned long op = MMU_MISSQW0_OP_GET(missqw0); /* * Potential hardware bug, check if this is an ifetch with a write op. * If so, we will be in an infinite loop. check here because this * is under debug. */ if ((src == 0) && (op == 1)) { printk(KERN_CRIT "iftech/write: missqw0=%lx, missqw1=%lx\n", missqw0, missqw1); return 0; } /* * See if we now have a valid pte? */ pgd = (pgd_t *)(MMU_MISSQW0_PGD_GET(missqw0) << MMU_MISSQW0_PGD_SHIFT); address = (unsigned long)(MMU_MISSQW1_VPN_GET(missqw1) << MMU_VPN_SHIFT); pmd = (pmd_t *)__pgd_offset(pgd, address); if (unlikely(pmd_none(*pmd)) || (unlikely(pmd_bad(*pmd)))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] is empty\n", address, pgd, pmd); return 0; } ptep = pte_offset_map(pmd, address); if (unlikely(pte_none(*ptep)) || (unlikely(pte_bad(*ptep)))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is empty\n", address, pgd, pmd, ptep); return 0; } if (unlikely(!pte_present(*ptep))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is invalid: 0x%lx\n", address, pgd, pmd, ptep, pte_val(*ptep)); return 0; } if (MMU_MISSQW0_OP_GET(missqw0) && !pte_write(*ptep)) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] write requested but not given: 0x%lx\n", address, pgd, pmd, ptep, pte_val(*ptep)); /* Fall through, not as critical */ } fault_printk(FAULT_DBG_TRACE, "FAULT[%d]: ti[%p], missqw0=%08lx, missqw1=%08lx, resolved!\n", raw_smp_processor_id(), (void *)current_thread_info(), missqw0, missqw1); return 1; }
/* * huge_ptep_set_access_flags will update access flags (dirty, accesssed) * and write permission. * * For a contiguous huge pte range we need to check whether or not write * permission has to change only on the first pte in the set. Then for * all the contiguous ptes we need to check whether or not there is a * discrepancy between dirty or young. */ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) { int i; if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) return 1; for (i = 0; i < ncontig; i++) { pte_t orig_pte = huge_ptep_get(ptep + i); if (pte_dirty(pte) != pte_dirty(orig_pte)) return 1; if (pte_young(pte) != pte_young(orig_pte)) return 1; } return 0; }
int get_user_page(struct proc *p, unsigned long uvastart, int write, int force, struct page **plist) { pte_t pte; int ret = -1; struct page *pp; spin_lock(&p->pte_lock); pte = pgdir_walk(p->env_pgdir, (void*)uvastart, TRUE); if (!pte_walk_okay(pte)) goto err1; if (!pte_is_present(pte)) { unsigned long prot = PTE_P | PTE_U | PTE_A | PTE_W | PTE_D; #if 0 printk("[akaros]: get_user_page() uva=0x%llx pte absent\n", uvastart); #endif /* * TODO: ok to allocate with pte_lock? "prot" needs to be * based on VMR writability, refer to pgprot_noncached(). */ if (upage_alloc(p, &pp, 0)) goto err1; pte_write(pte, page2pa(pp), prot); } else { pp = pa2page(pte_get_paddr(pte)); /* __vmr_free_pgs() refcnt's pagemap pages differently */ if (atomic_read(&pp->pg_flags) & PG_PAGEMAP) { printk("[akaros]: get_user_page(): uva=0x%llx\n", uvastart); goto err1; } } if (write && (!pte_has_perm_urw(pte))) { /* TODO: How is Linux using the "force" parameter */ printk("[akaros]: get_user_page() uva=0x%llx pte ro\n", uvastart); goto err1; } /* TODO (GUP): change the interface such that devices provide the memory and * the user mmaps it, instead of trying to pin arbitrary user memory. */ warn_once("Extremely unsafe, unpinned memory mapped! If your process dies, you might scribble on RAM!"); plist[0] = pp; ret = 1; err1: spin_unlock(&p->pte_lock); return ret; }
unsigned long l4x_set_pte(struct mm_struct *mm, unsigned long addr, pte_t old, pte_t pteval) { /* * Check if any invalidation is necessary * * Invalidation (flush) necessary if: * old page was present * new page is not present OR * new page has another physical address OR * new page has another protection OR * new page has other access attributes */ /* old was present && new not -> flush */ int flush_rights = L4_FPAGE_RWX; if (pte_present(pteval)) { /* new page is present, * now we have to find out what has changed */ if (((pte_val(old) ^ pte_val(pteval)) & L4X_PHYSICAL_PAGE_MASK) || (pte_young(old) && !pte_young(pteval))) { /* physical page frame changed * || access attribute changed -> flush */ /* flush is the default */ } else if ((pte_write(old) && !pte_write(pteval)) || (pte_dirty(old) && !pte_dirty(pteval))) { /* Protection changed from r/w to ro * or page now clean -> remap */ flush_rights = L4_FPAGE_W; } else { /* nothing changed, simply return */ return pte_val(pteval); } } /* Ok, now actually flush or remap the page */ l4x_flush_page(mm, pte_val(old) & L4X_PHYSICAL_PAGE_MASK, addr, PAGE_SHIFT, flush_rights, _RET_IP_); return pte_val(pteval); }
void pte_free(struct page *pte) { unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); if (!pte_write(*virt_to_ptep(va))) BUG_ON(HYPERVISOR_update_va_mapping( va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); ClearPageForeign(pte); init_page_count(pte); __free_page(pte); }
static int page_present(struct mm_struct *mm, void __user *uptr, int wr) { unsigned long addr = (unsigned long)uptr; pgd_t *pgd = pgd_offset(mm, addr); if (pgd_present(*pgd)) { pmd_t *pmd = pmd_offset(pgd, addr); if (pmd_present(*pmd)) { pte_t *pte = pte_offset_map(pmd, addr); return (pte_present(*pte) && (!wr || pte_write(*pte))); } } return 0; }
/* * This routine puts a long into any process space by following the page * tables. NOTE! You should check that the long isn't on a page boundary, * and that it is in the task area before calling this: this routine does * no checking. * * Now keeps R/W state of page so that a text page stays readonly * even if a debugger scribbles breakpoints into it. -M.U- */ static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr, unsigned long data) { pgd_t *pgdir; pmd_t *pgmiddle; pte_t *pgtable; unsigned long page; repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (!pgd_present(*pgdir)) { do_no_page(tsk, vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir)); pgd_clear(pgdir); return; } pgmiddle = pmd_offset(pgdir,addr); if (pmd_none(*pgmiddle)) { do_no_page(tsk, vma, addr, 1); goto repeat; } if (pmd_bad(*pgmiddle)) { printk("ptrace: bad page directory %08lx\n", pmd_val(*pgmiddle)); pmd_clear(pgmiddle); return; } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { do_no_page(tsk, vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { do_wp_page(tsk, vma, addr, 2); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ if (page < high_memory) { *(unsigned long *) (page + (addr & ~PAGE_MASK)) = data; flush_page_to_ram (page); } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ *pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot)); flush_tlb_all(); }
void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { pte_t pte = __pte(flags); /* writeable implies dirty for kernel addresses */ if (pte_write(pte)) pte = pte_mkdirty(pte); /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ pte = pte_exprotect(pte); pte = pte_mkprivileged(pte); return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0)); }
static unsigned long maybe_map(unsigned long virt, int is_write) { pte_t pte; int err; void *phys = um_virt_to_phys(current, virt, &pte); int dummy_code; if(IS_ERR(phys) || (is_write && !pte_write(pte))) { err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if(err) return(0); phys = um_virt_to_phys(current, virt, NULL); } return((unsigned long) phys); }
static pte_t *maybe_map(unsigned long virt, int is_write) { pte_t *pte = virt_to_pte(current->mm, virt); int err, dummy_code; if ((pte == NULL) || !pte_present(*pte) || (is_write && !pte_write(*pte))) { err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if (err) return NULL; pte = virt_to_pte(current->mm, virt); } if (!pte_present(*pte)) pte = NULL; return pte; }
static void write_addr(u128 * target, u128 * inject) { pte_t * ppt, pt; unsigned int level; if(pte_write(*lookup_address((unsigned long)target, &level)) == 0) { ppt = lookup_address((unsigned long)target, &level); pt = pte_mkwrite(*ppt); set_pte(ppt, pt); *target = *inject; ppt = lookup_address((unsigned long)target, &level); pt = pte_wrprotect(*ppt); set_pte(ppt, pt); }else { *target = *inject; } }
static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { struct mm_struct *mm = current->mm; unsigned long offset, pfn, done, size; pte_t *pte; void *from, *to; done = 0; retry: spin_lock(&mm->page_table_lock); do { pte = follow_table(mm, uaddr); if ((unsigned long) pte < 0x1000) goto fault; if (!pte_present(*pte)) { pte = (pte_t *) 0x11; goto fault; } else if (write_user && !pte_write(*pte)) { pte = (pte_t *) 0x04; goto fault; } pfn = pte_pfn(*pte); offset = uaddr & (PAGE_SIZE - 1); size = min(n - done, PAGE_SIZE - offset); if (write_user) { to = (void *)((pfn << PAGE_SHIFT) + offset); from = kptr + done; } else { from = (void *)((pfn << PAGE_SHIFT) + offset); to = kptr + done; } memcpy(to, from, size); done += size; uaddr += size; } while (done < n); spin_unlock(&mm->page_table_lock); return n - done; fault: spin_unlock(&mm->page_table_lock); if (__handle_fault(uaddr, (unsigned long) pte, write_user)) return n - done; goto retry; }
/* * The above separate functions for the no-page and wp-page * cases will go away (they mostly do the same thing anyway), * and we'll instead use only a general "handle_mm_fault()". * * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { if (!pte_present(*pte)) { do_no_page(current, vma, address, write_access); return; } set_pte(pte, pte_mkyoung(*pte)); flush_tlb_page(vma, address); if (!write_access) return; if (pte_write(*pte)) { set_pte(pte, pte_mkdirty(*pte)); flush_tlb_page(vma, address); return; } do_wp_page(current, vma, address, write_access); }
/* * We can receive a page fault from a migrating PTE at any time. * Handle it by just waiting until the fault resolves. * * It's also possible to get a migrating kernel PTE that resolves * itself during the downcall from hypervisor to Linux. We just check * here to see if the PTE seems valid, and if so we retry it. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. * * If we find a migrating PTE while we're in an NMI context, and we're * at a PC that has a registered exception handler, we don't wait, * since this thread may (e.g.) have been interrupted while migrating * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; pmd_t *pmd; pte_t *pte; pte_t pteval; if (pgd_addr_invalid(address)) return 0; pgd += pgd_index(address); pud = pud_offset(pgd, address); if (!pud || !pud_present(*pud)) return 0; pmd = pmd_offset(pud, address); if (!pmd || !pmd_present(*pmd)) return 0; pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { if (in_nmi() && search_exception_tables(pc)) return 0; wait_for_migration(pte); return 1; } if (!is_kernel_mode || !pte_present(pteval)) return 0; if (fault_num == INT_ITLB_MISS) { if (pte_exec(pteval)) return 1; } else if (write) { if (pte_write(pteval)) return 1; } else { if (pte_read(pteval)) return 1; } return 0; }
struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long kaddr; pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; struct page *page = NULL; /* address is user VA; convert to kernel VA of desired page */ kaddr = (address - vma->vm_start) + offset; kaddr = VMALLOC_VMADDR(kaddr); spin_lock(&init_mm.page_table_lock); /* Lookup page structure for kernel VA */ pgd = pgd_offset(&init_mm, kaddr); if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; pmd = pmd_offset(pgd, kaddr); if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; ptep = pte_offset(pmd, kaddr); if (!ptep) goto out; pte = *ptep; if (!pte_present(pte)) goto out; if (write && !pte_write(pte)) goto out; page = pte_page(pte); if (!VALID_PAGE(page)) { page = NULL; goto out; } /* Increment reference count on page */ get_page(page); out: spin_unlock(&init_mm.page_table_lock); return page; }
/* Add the output line for the given page to the buffer, returning * number of chars added. Returns 0 if it can't fit into the * buffer. */ static int show_one_page(pte_t pte, char *buffer, int buflen) { int len; unsigned long pfn = 0UL; swp_entry_t swap_entry; int present; int writable; unsigned long cookie; swap_entry.val = 0UL; /* All zeros not a valid entry */ present = pte_present(pte); writable = pte_write(pte) ? 1 : 0; if (present) { pfn = pte_pfn(pte); if (!pfn_valid(pfn)) { pfn = 0; } cookie = pfn; } else { swap_entry = pte_to_swp_entry(pte); cookie = swap_entry.val; } len = snprintf (buffer, buflen, "%d %d %lx\n", present, writable, cookie); if (len >= buflen) goto ETOOLONG; return len; ETOOLONG: buffer[0] = '\0'; return 0; }
/* * This routine puts a long into any process space by following the page * tables. NOTE! You should check that the long isn't on a page boundary, * and that it is in the task area before calling this: this routine does * no checking. * * Now keeps R/W state of page so that a text page stays readonly * even if a debugger scribbles breakpoints into it. -M.U- */ static void put_long(struct vm_area_struct * vma, unsigned long addr, unsigned long data) { pgd_t *pgdir; pte_t *pgtable; unsigned long page; repeat: pgdir = PAGE_DIR_OFFSET(vma->vm_task, addr); if (!pgd_present(*pgdir)) { do_no_page(vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir)); pgd_clear(pgdir); return; } pgtable = (pte_t *) (PAGE_PTR(addr) + pgd_page(*pgdir)); if (!pte_present(*pgtable)) { do_no_page(vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { do_wp_page(vma, addr, 1); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ if (page < high_memory) { page += addr & ~PAGE_MASK; *(unsigned long *) page = data; } /* we're bypassing pagetables, so we have to set the dirty bit ourselves */ /* this should also re-instate whatever read-only mode there was before */ *pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot)); invalidate(); }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd))) return 0; /* * A pmd can be bad if it refers to a HugeTLB or THP page. * * Both THP and HugeTLB pages have the same pmd layout * and should not be manipulated by the pte functions. * * Lock the page table for the destination and check * to see that it's still huge and whether or not we will * need to fault on write, or if we have a splitting THP. */ if (unlikely(pmd_thp_or_huge(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); if (unlikely(!pmd_thp_or_huge(*pmd) || pmd_hugewillfault(*pmd) || pmd_trans_splitting(*pmd))) { spin_unlock(ptl); return 0; } *ptep = NULL; *ptlp = ptl; return 1; } if (unlikely(pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
/* * FOLL_FORCE can write to even unwritable pte's, but only * after we've gone through a COW cycle and they are dirty. */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { return pte_write(pte) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); }
/* check the permissions of a address and return its type - */ static int memory_check_addr_perm_task(const void *addr, word *size, int write, byte *read_only, byte *executable, struct task_struct *task) { struct vm_area_struct *vma; word start = ROUNDDOWN((word)addr, PAGE_SIZE); word end = ROUNDUP((word)addr + *size, PAGE_SIZE); word total_size = 0; byte local_read_only = 0; byte local_executable = 0; int ret = ADDR_UNDEF; int atomic; #ifdef HAS_LOOKUP_ADDRESS pte_t *pte; unsigned int level; #endif if (NULL == read_only) { read_only = &local_read_only; } if (NULL == executable) { executable = &local_executable; } *read_only = 0; *executable = 0; atomic = in_atomic(); if (!atomic) { down_read(&task->mm->mmap_sem); } while (start < end) { if (task && task->mm) { /* check if it's a user address */ vma = find_vma(task->mm, start); if (vma && vma->vm_start <= start) { if (ret != ADDR_UNDEF && ret != ADDR_OUTSIDE) { goto end; } if (!(vma->vm_flags & VM_READ)) { goto end; } if (!(vma->vm_flags & VM_WRITE)) { if (write) { /* no more writable bytes */ goto end; } else if (ret != ADDR_UNDEF && !(*read_only)) { /* the permissions has changed. this is where we stop the buffer */ goto end; } *read_only = 1; } start = vma->vm_end; total_size = start - (word)addr; ret = ADDR_OUTSIDE; continue; } } /* check if it's a kernel virtual address */ #ifdef HAS_LOOKUP_ADDRESS pte = lookup_address((unsigned long)addr, &level); if (NULL == pte) { goto end; } if (ret == ADDR_UNDEF) { *executable = pte_exec(*pte); } if (pte_present(*pte)) { if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) { goto end; } if (!pte_write(*pte)) { if (write) { /* no more writable bytes */ goto end; } else if (ret != ADDR_UNDEF && !(*read_only)) { /* the permissions has changed. this is where we stop the buffer */ goto end; } *read_only = 1; } start += PAGE_SIZE; total_size = start - (word)addr; ret = ADDR_INSIDE; continue; } goto end; #else if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) { goto end; } if ( start >= PAGE_OFFSET || (start >= MODULES_VADDR && start < MODULES_END) || (start >= VMALLOC_START && start < VMALLOC_END)) { /* this is not totally safe. but it's enough for now. */ *executable = 1; start += PAGE_SIZE; total_size = start - (word)addr; ret = ADDR_INSIDE; continue; } goto end; #endif } end: if (!atomic) { up_read(&task->mm->mmap_sem); } if (total_size) { if (total_size < *size) { *size = total_size; } return ret; } else { return ADDR_UNDEF; } }
static size_t copy_in_user_pt(size_t n, void __user *to, const void __user *from) { struct mm_struct *mm = current->mm; unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; pte_t *pte_from, *pte_to; int write_user; if (segment_eq(get_fs(), KERNEL_DS)) { memcpy((void __force *) to, (void __force *) from, n); return 0; } done = 0; retry: spin_lock(&mm->page_table_lock); do { write_user = 0; uaddr = uaddr_from; pte_from = follow_table(mm, uaddr_from); error_code = (unsigned long) pte_from; if (error_code < 0x1000) goto fault; if (!pte_present(*pte_from)) { error_code = 0x11; goto fault; } write_user = 1; uaddr = uaddr_to; pte_to = follow_table(mm, uaddr_to); error_code = (unsigned long) pte_to; if (error_code < 0x1000) goto fault; if (!pte_present(*pte_to)) { error_code = 0x11; goto fault; } else if (!pte_write(*pte_to)) { error_code = 0x04; goto fault; } pfn_from = pte_pfn(*pte_from); pfn_to = pte_pfn(*pte_to); offset_from = uaddr_from & (PAGE_SIZE-1); offset_to = uaddr_from & (PAGE_SIZE-1); offset_max = max(offset_from, offset_to); size = min(n - done, PAGE_SIZE - offset_max); memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); done += size; uaddr_from += size; uaddr_to += size; } while (done < n); spin_unlock(&mm->page_table_lock); return n - done; fault: spin_unlock(&mm->page_table_lock); if (__handle_fault(uaddr, error_code, write_user)) return n - done; goto retry; }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; unsigned long addr, end; int r, w, x, err, fd; if(mm == NULL) return; fd = mm->context.skas.mm_fd; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ if(force || pgd_newpage(*npgd)){ end = addr + PGDIR_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pgd_mkuptodate(*npgd); } addr += PGDIR_SIZE; continue; } npud = pud_offset(npgd, addr); if(!pud_present(*npud)){ if(force || pud_newpage(*npud)){ end = addr + PUD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pud_mkuptodate(*npud); } addr += PUD_SIZE; continue; } npmd = pmd_offset(npud, addr); if(!pmd_present(*npmd)){ if(force || pmd_newpage(*npmd)){ end = addr + PMD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; continue; } npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = unmap(fd, (void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map(fd, addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)) protect(fd, addr, PAGE_SIZE, r, w, x, 1); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } }