static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset(&pud, addr); do { pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); if (pmd_none(pmd)) return 0; if (pmd_huge(pmd)) { if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, write, pages, nr)) return 0; } else if (is_hugepd(pmdp)) { if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, addr, next, write, pages, nr)) return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; }
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset(&pud, addr); do { pmd_t pmd = ACCESS_ONCE(*pmdp); next = pmd_addr_end(addr, end); /* * If we find a splitting transparent hugepage we * return zero. That will result in taking the slow * path which will call wait_split_huge_page() * if the pmd is still in splitting state */ if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (pmd_huge(pmd) || pmd_large(pmd)) { if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, write, pages, nr)) return 0; } else if (is_hugepd(pmdp)) { if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, addr, next, write, pages, nr)) return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; }
static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { pte_t *ptep, pte; pmd_t *pmdp; pud_t *pudp; pgd_t *pgdp; int i; for (i = 0; i < numpages; i++) { pgdp = pgd_offset(&init_mm, addr); pudp = pud_offset(pgdp, addr); pmdp = pmd_offset(pudp, addr); if (pmd_huge(*pmdp)) { WARN_ON_ONCE(1); continue; } ptep = pte_offset_kernel(pmdp, addr); pte = *ptep; pte = set(pte); __ptep_ipte(addr, ptep); *ptep = pte; addr += PAGE_SIZE; } }
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; #ifdef CONFIG_HUGETLB_SUPER_PAGES pte_t *pte; #endif /* Get the top-level page table entry. */ pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0); /* We don't have four levels. */ pud = pud_offset(pgd, addr); #ifndef __PAGETABLE_PUD_FOLDED # error support fourth page table level #endif if (!pud_present(*pud)) return NULL; /* Check for an L0 huge PTE, if we have three levels. */ #ifndef __PAGETABLE_PMD_FOLDED if (pud_huge(*pud)) return (pte_t *)pud; pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud), pmd_index(addr), 1); if (!pmd_present(*pmd)) return NULL; #else pmd = pmd_offset(pud, addr); #endif /* Check for an L1 huge PTE. */ if (pmd_huge(*pmd)) return (pte_t *)pmd; #ifdef CONFIG_HUGETLB_SUPER_PAGES /* Check for an L2 huge PTE. */ pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2); if (!pte_present(*pte)) return NULL; if (pte_super(*pte)) return pte; #endif return NULL; }
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset(&pud, addr); do { pmd_t pmd = READ_ONCE(*pmdp); next = pmd_addr_end(addr, end); /* * If we find a splitting transparent hugepage we * return zero. That will result in taking the slow * path which will call wait_split_huge_page() * if the pmd is still in splitting state */ if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (pmd_huge(pmd) || pmd_large(pmd)) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they * can be serialised against THP migration. */ if (pmd_numa(pmd)) return 0; if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, write, pages, nr)) return 0; } else if (is_hugepd(pmdp)) { if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, addr, next, write, pages, nr)) return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; }
void radix__change_memory_range(unsigned long start, unsigned long end, unsigned long clear) { unsigned long idx; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; start = ALIGN_DOWN(start, PAGE_SIZE); end = PAGE_ALIGN(end); // aligns up pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n", start, end, clear); for (idx = start; idx < end; idx += PAGE_SIZE) { pgdp = pgd_offset_k(idx); pudp = pud_alloc(&init_mm, pgdp, idx); if (!pudp) continue; if (pud_huge(*pudp)) { ptep = (pte_t *)pudp; goto update_the_pte; } pmdp = pmd_alloc(&init_mm, pudp, idx); if (!pmdp) continue; if (pmd_huge(*pmdp)) { ptep = pmdp_ptep(pmdp); goto update_the_pte; } ptep = pte_alloc_kernel(pmdp, idx); if (!ptep) continue; update_the_pte: radix__pte_update(&init_mm, idx, ptep, clear, 0, 0); } radix__flush_tlb_kernel_range(start, end); }
/* * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { pmd_t *pmd_k; pte_t *pte_k; /* Make sure we are in vmalloc area */ if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; /* * Synchronize this task's top level page-table * with the 'reference' page table. */ pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; if (pmd_huge(*pmd_k)) return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; return 0; }
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgdp; pud_t *pudp, pud; pmd_t *pmdp, pmd; pgdp = pgd_offset(mm, addr); if (!pgd_present(READ_ONCE(*pgdp))) return NULL; pudp = pud_offset(pgdp, addr); pud = READ_ONCE(*pudp); if (sz != PUD_SIZE && pud_none(pud)) return NULL; /* hugepage or swap? */ if (pud_huge(pud) || !pud_present(pud)) return (pte_t *)pudp; /* table; check the next level */ if (sz == CONT_PMD_SIZE) addr &= CONT_PMD_MASK; pmdp = pmd_offset(pudp, addr); pmd = READ_ONCE(*pmdp); if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && pmd_none(pmd)) return NULL; if (pmd_huge(pmd) || !pmd_present(pmd)) return (pte_t *)pmdp; if (sz == CONT_PTE_SIZE) return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); return NULL; }
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd = NULL; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; pud = pud_offset(pgd, addr); if (!pud_present(*pud)) return NULL; if (pud_huge(*pud)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) return NULL; if (pte_cont(pmd_pte(*pmd))) { pmd = pmd_offset( pud, (addr & CONT_PMD_MASK)); return (pte_t *)pmd; } if (pmd_huge(*pmd)) return (pte_t *)pmd; pte = pte_offset_kernel(pmd, addr); if (pte_present(*pte) && pte_cont(*pte)) { pte = pte_offset_kernel( pmd, (addr & CONT_PTE_MASK)); return pte; } return NULL; }
/* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) return pte_page(pmd_pte(pmd)); return virt_to_page(pmd_page_vaddr(pmd)); }
/** * follow_page_mask - look up a page descriptor from a user-virtual address * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour * @page_mask: on output, *page_mask is set according to the size of the page * * @flags can have FOLL_ flags set, defined in <linux/mm.h> * * Returns the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; *page_mask = 0; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { BUG_ON(flags & FOLL_GET); return page; } pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); p4d = p4d_offset(pgd, address); if (p4d_none(*p4d)) return no_page_table(vma, flags); BUILD_BUG_ON(p4d_huge(*p4d)); if (unlikely(p4d_bad(*p4d))) return no_page_table(vma, flags); pud = pud_offset(p4d, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pud(mm, address, pud, flags); if (page) return page; return no_page_table(vma, flags); } if (pud_devmap(*pud)) { ptl = pud_lock(mm, pud); page = follow_devmap_pud(vma, address, pud, flags); spin_unlock(ptl); if (page) return page; } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pmd(mm, address, pmd, flags); if (page) return page; return no_page_table(vma, flags); } if (pmd_devmap(*pmd)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags); spin_unlock(ptl); if (page) return page; } if (likely(!pmd_trans_huge(*pmd))) return follow_page_pte(vma, address, pmd, flags); if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) return no_page_table(vma, flags); ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags); } if (flags & FOLL_SPLIT) { int ret; page = pmd_page(*pmd); if (is_huge_zero_page(page)) { spin_unlock(ptl); ret = 0; split_huge_pmd(vma, pmd, address); if (pmd_trans_unstable(pmd)) ret = -EBUSY; } else { get_page(page); spin_unlock(ptl); lock_page(page); ret = split_huge_page(page); unlock_page(page); put_page(page); if (pmd_none(*pmd)) return no_page_table(vma, flags); } return ret ? ERR_PTR(ret) : follow_page_pte(vma, address, pmd, flags); } page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; return page; }
/** * follow_page_mask - look up a page descriptor from a user-virtual address * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour * @page_mask: on output, *page_mask is set according to the size of the page * * @flags can have FOLL_ flags set, defined in <linux/mm.h> * * Returns the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; *page_mask = 0; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { BUG_ON(flags & FOLL_GET); return page; } pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); pud = pud_offset(pgd, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { if (flags & FOLL_GET) return NULL; page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); return page; } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); if (flags & FOLL_GET) { /* * Refcount on tail pages are not well-defined and * shouldn't be taken. The caller should handle a NULL * return when trying to follow tail pages. */ if (PageHead(page)) get_page(page); else page = NULL; } return page; } if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) return no_page_table(vma, flags); if (pmd_trans_huge(*pmd)) { if (flags & FOLL_SPLIT) { split_huge_page_pmd(vma, address, pmd); return follow_page_pte(vma, address, pmd, flags); } ptl = pmd_lock(mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { spin_unlock(ptl); wait_split_huge_page(vma->anon_vma, pmd); } else { page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; return page; } } else spin_unlock(ptl); } return follow_page_pte(vma, address, pmd, flags); }
/** * follow_page_mask - look up a page descriptor from a user-virtual address * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour * @page_mask: on output, *page_mask is set according to the size of the page * * @flags can have FOLL_ flags set, defined in <linux/mm.h> * * Returns the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; *page_mask = 0; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { BUG_ON(flags & FOLL_GET); return page; } pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); pud = pud_offset(pgd, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pud(mm, address, pud, flags); if (page) return page; return no_page_table(vma, flags); } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pmd(mm, address, pmd, flags); if (page) return page; return no_page_table(vma, flags); } if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) return no_page_table(vma, flags); if (pmd_trans_huge(*pmd)) { if (flags & FOLL_SPLIT) { split_huge_page_pmd(vma, address, pmd); return follow_page_pte(vma, address, pmd, flags); } ptl = pmd_lock(mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { spin_unlock(ptl); wait_split_huge_page(vma->anon_vma, pmd); } else { page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; return page; } } else spin_unlock(ptl); } return follow_page_pte(vma, address, pmd, flags); }