/* * XXX needed for backend support * */ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long **frames = (unsigned long **)data; set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); (*frames)++; return 0; }
/* * Only sets the access flags (dirty, accessed), as well as write * permission. Furthermore, we know it always gets set to a "more * permissive" setting, which allows most architectures to optimize * this. We return whether the PTE actually changed, which in turn * instructs the caller to do things like update__mmu_cache. This * used to be done in the caller, but sparc needs minor faults to * force that call on sun4c so we changed this macro slightly */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed = !pte_same(*ptep, entry); if (changed) { set_pte_at(vma->vm_mm, address, ptep, entry); flush_tlb_fix_spurious_fault(vma, address); } return changed; }
/* * set a new huge pmd. We should not be called for updating * an existing pmd entry. That should go via pmd_hugepage_update. */ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { #ifdef CONFIG_DEBUG_VM WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); }
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) { pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); }
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { int i; for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { set_pte_at(mm, addr, ptep, entry); ptep++; addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } }
/* * The __w1data area holds data that is only written during initialization, * and is read-only and thus freely cacheable thereafter. Fix the page * table entries that cover that region accordingly. */ static void mark_w1data_ro(void) { /* Loop over page table entries */ unsigned long addr = (unsigned long)__w1data_begin; BUG_ON((addr & (PAGE_SIZE-1)) != 0); for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { unsigned long pfn = kaddr_to_pfn((void *)addr); pte_t *ptep = virt_to_pte(NULL, addr); BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); } }
/* Remap IO memory, the same way as remap_pfn_range(), but use * the obio memory space. * * They use a pgprot that sets PAGE_IO and does not check the * mem_map table as this is independent of normal memory. */ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot, int space) { unsigned long end; /* clear hack bit that was used as a write_combine side-effect flag */ offset &= ~0x1UL; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t entry; unsigned long curend = address + PAGE_SIZE; entry = mk_pte_io(offset, prot, space); if (!(address & 0xffff)) { if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { entry = mk_pte_io(offset, __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), space); curend = address + 0x400000; offset += 0x400000; } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { entry = mk_pte_io(offset, __pgprot(pgprot_val (prot) | _PAGE_SZ512K), space); curend = address + 0x80000; offset += 0x80000; } else if (!(offset & 0xfffe) && end >= address + 0x10000) { entry = mk_pte_io(offset, __pgprot(pgprot_val (prot) | _PAGE_SZ64K), space); curend = address + 0x10000; offset += 0x10000; } else offset += PAGE_SIZE; } else offset += PAGE_SIZE; do { BUG_ON(!pte_none(*pte)); set_pte_at(mm, address, pte, entry); address += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; pte++; } while (address < curend); } while (address < end); }
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { unsigned long addr; unsigned long i; addr = (unsigned long)shared; for (i = 0; i < nr_gframes; i++) { set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], __pte(0)); addr += PAGE_SIZE; } }
static void free_init_pages(char *what, unsigned long begin, unsigned long end) { #ifdef CONFIG_HOMECACHE int home = initial_heap_home(); #endif unsigned long addr = (unsigned long) begin; if (kdata_huge && !initfree) { pr_warning("Warning: ignoring initfree=0:" " incompatible with kdata=huge\n"); initfree = 1; } end = (end + PAGE_SIZE - 1) & PAGE_MASK; local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin); for (addr = begin; addr < end; addr += PAGE_SIZE) { /* * Note we just reset the home here directly in the * page table. We know this is safe because our caller * just flushed the caches on all the other cpus, * and they won't be touching any of these pages. */ int pfn = kaddr_to_pfn((void *)addr); struct page *page = pfn_to_page(pfn); pte_t *ptep = virt_to_pte(NULL, addr); if (!initfree) { /* * If debugging page accesses then do not free * this memory but mark them not present - any * buggy init-section access will create a * kernel page fault: */ pte_clear(&init_mm, addr, ptep); continue; } #ifdef CONFIG_HOMECACHE set_page_home(page, home); __clear_bit(PG_homecache_nomigrate, &page->flags); #endif __ClearPageReserved(page); init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); }
/* * nid, region_start, and region_end are hints to try to place the page * table memory in the same node or region. */ static int __map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, int nid, unsigned long region_start, unsigned long region_end) { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; /* * Make sure task size is correct as per the max adddr */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); if (unlikely(!slab_is_available())) return early_map_kernel_page(ea, pa, flags, map_page_size, nid, region_start, region_end); /* * Should make page table allocation functions be able to take a * node, so we can place kernel page tables on the right nodes after * boot. */ pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; }
static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, addr); pte_t zero_pte; zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL); zero_pte = pte_wrprotect(zero_pte); while (addr + PAGE_SIZE <= end) { set_pte_at(&init_mm, addr, pte, zero_pte); addr += PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); } }
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { int i; if (!pte_present(*ptep) && pte_present(entry)) mm->context.huge_pte_count++; for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { set_pte_at(mm, addr, ptep, entry); ptep++; addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } }
/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, unsigned long addr, swp_entry_t entry, struct page *page) { inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* * Move the page to the active list so it is not * immediately swapped out again after swapon. */ activate_page(page); }
static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot, int space) { unsigned long end; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { set_pte_at(mm, address, pte, mk_pte_io(offset, prot, space)); address += PAGE_SIZE; offset += PAGE_SIZE; pte++; } while (address < end); }
/* * set a new huge pmd. We should not be called for updating * an existing pmd entry. That should go via pmd_hugepage_update. */ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { #ifdef CONFIG_DEBUG_VM /* * Make sure hardware valid bit is not set. We don't do * tlb flush for this update. */ WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(pmd_lockptr(mm, pmdp)); WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); }
static void ion_clean_and_init_allocated_pages( struct ion_system_heap *heap, struct scatterlist *sgl, int nents, bool memory_zero) { int i; struct scatterlist *sg; size_t sum = 0; int page_idx; unsigned long vaddr; pte_t *ptep; down(&heap->vm_sem); page_idx = atomic_pop(&heap->page_idx, VM_PAGE_COUNT_WIDTH); BUG_ON((page_idx < 0) || (page_idx >= VM_PAGE_COUNT)); ptep = heap->pte[page_idx * (SZ_1M / PAGE_SIZE)]; vaddr = (unsigned long)heap->reserved_vm_area->addr + (SZ_1M * page_idx); for_each_sg(sgl, sg, nents, i) { int j; if (!PageHighMem(sg_page(sg))) { memset(page_address(sg_page(sg)), 0, sg_dma_len(sg)); continue; } for (j = 0; j < (sg_dma_len(sg) / PAGE_SIZE); j++) { set_pte_at(&init_mm, vaddr, ptep, mk_pte(sg_page(sg) + j, PAGE_KERNEL)); ptep++; vaddr += PAGE_SIZE; } sum += j * PAGE_SIZE; if (sum == SZ_1M) { ptep = heap->pte[page_idx * (SZ_1M / PAGE_SIZE)]; vaddr = (unsigned long)heap->reserved_vm_area->addr + (SZ_1M * page_idx); ion_clean_and_unmap(vaddr, ptep, sum, memory_zero); sum = 0; } }
static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long phys_addr, pgprot_t prot) { pte_t *pte; unsigned long pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { BUG_ON(!pte_none(*pte)); set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; }
static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, int nid, unsigned long region_start, unsigned long region_end) { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, region_start, region_end); pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, region_start, region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE, nid, region_start, region_end); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; }
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { pte_t **ptes; unsigned long addr; unsigned long i; if (shared == gnttab_status_vm_area.area->addr) ptes = gnttab_status_vm_area.ptes; else ptes = gnttab_shared_vm_area.ptes; addr = (unsigned long)shared; for (i = 0; i < nr_gframes; i++) { set_pte_at(&init_mm, addr, ptes[i], __pte(0)); addr += PAGE_SIZE; } }
void *kmap_atomic(struct page *page) { int idx, cpu_idx; unsigned long vaddr; preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); cpu_idx = kmap_atomic_idx_push(); idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); vaddr = FIXMAP_ADDR(idx); set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, mk_pte(page, kmap_prot)); return (void *)vaddr; }
/* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); if (!PageHighMem(page)) return page_address(page); debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); return (void *)vaddr; }
static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the * Documentation/vm/soft-dirty.txt for full description * of how soft-dirty works. */ pte_t ptent = *pte; if (pte_present(ptent)) { ptent = pte_wrprotect(ptent); ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); } else if (is_swap_pte(ptent)) { ptent = pte_swp_clear_soft_dirty(ptent); } set_pte_at(vma->vm_mm, addr, pte, ptent); }
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pte_t *pte; pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { struct page *page = pages[*nr]; if (WARN_ON(!pte_none(*pte))) return -EBUSY; if (WARN_ON(!page)) return -ENOMEM; set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; }
int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, unsigned long max_nr_gframes, grant_status_t **__shared) { grant_status_t *shared = *__shared; unsigned long addr; unsigned long i; if (shared == NULL) *__shared = shared = gnttab_status_vm_area.area->addr; addr = (unsigned long)shared; for (i = 0; i < nr_gframes; i++) { set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], mfn_pte(frames[i], PAGE_KERNEL)); addr += PAGE_SIZE; } return 0; }
static int map_tboot_page(unsigned long vaddr, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = pgd_offset(&tboot_mm, vaddr); pud = pud_alloc(&tboot_mm, pgd, vaddr); if (!pud) return -1; pmd = pmd_alloc(&tboot_mm, pud, vaddr); if (!pmd) return -1; pte = pte_alloc_map(&tboot_mm, pmd, vaddr); if (!pte) return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); return 0; }
int arch_prepare_hugepage(struct page *page) { unsigned long addr = page_to_phys(page); pte_t pte; pte_t *ptep; int i; if (MACHINE_HAS_HPAGE) return 0; ptep = (pte_t *) pte_alloc_one(&init_mm, addr); if (!ptep) return -ENOMEM; pte_val(pte) = addr; for (i = 0; i < PTRS_PER_PTE; i++) { set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); pte_val(pte) += PAGE_SIZE; } page[1].index = (unsigned long) ptep; return 0; }
static int mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); ret = 0; out_unlock: pte_unmap_unlock(dst_pte, ptl); return ret; }
static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, pte_t *pte, unsigned int flags) { /* No page to get reference */ if (flags & FOLL_GET) return -EFAULT; if (flags & FOLL_TOUCH) { pte_t entry = *pte; if (flags & FOLL_WRITE) entry = pte_mkdirty(entry); entry = pte_mkyoung(entry); if (!pte_same(*pte, entry)) { set_pte_at(vma->vm_mm, address, pte, entry); update_mmu_cache(vma, address, pte); } } /* Proper page table entry exists, but no corresponding struct page */ return -EEXIST; }
void *kmap_atomic_prot(struct page *page, pgprot_t prot) { unsigned long vaddr; int idx, type; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); if (!PageHighMem(page)) return page_address(page); type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(*(kmap_pte-idx))); #endif set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); local_flush_tlb_page(NULL, vaddr); return (void *) vaddr; }
/* map fgmfn of domid to lpfn in the current domain */ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, unsigned int domid) { int rc; struct xen_add_to_physmap_range xatp = { .domid = DOMID_SELF, .foreign_domid = domid, .size = 1, .space = XENMAPSPACE_gmfn_foreign, }; xen_ulong_t idx = fgmfn; xen_pfn_t gpfn = lpfn; int err = 0; set_xen_guest_handle(xatp.idxs, &idx); set_xen_guest_handle(xatp.gpfns, &gpfn); set_xen_guest_handle(xatp.errs, &err); rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); if (rc || err) { pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n", rc, err, lpfn, fgmfn); return 1; } return 0; } struct remap_data { xen_pfn_t fgmfn; /* foreign domain's gmfn */ pgprot_t prot; domid_t domid; struct vm_area_struct *vma; int index; struct page **pages; struct xen_remap_mfn_info *info; }; static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *info = data; struct page *page = info->pages[info->index++]; unsigned long pfn = page_to_pfn(page); pte_t pte = pfn_pte(pfn, info->prot); if (map_foreign_page(pfn, info->fgmfn, info->domid)) return -EFAULT; set_pte_at(info->vma->vm_mm, addr, ptep, pte); return 0; } int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t mfn, int nr, pgprot_t prot, unsigned domid, struct page **pages) { int err; struct remap_data data; /* TBD: Batching, current sole caller only does page at a time */ if (nr > 1) return -EINVAL; data.fgmfn = mfn; data.prot = prot; data.domid = domid; data.vma = vma; data.index = 0; data.pages = pages; err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, remap_pte_fn, &data); return err; }