static void mincore_hugetlb_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, unsigned char *vec) { #ifdef CONFIG_HUGETLB_PAGE struct hstate *h; h = hstate_vma(vma); while (1) { unsigned char present; pte_t *ptep; /* * Huge pages are always in RAM for now, but * theoretically it needs to be checked. */ ptep = huge_pte_offset(current->mm, addr & huge_page_mask(h)); present = ptep && !huge_pte_none(huge_ptep_get(ptep)); while (1) { *vec = present; vec++; addr += PAGE_SIZE; if (addr == end) return; /* check hugepage border */ if (!(addr & ~huge_page_mask(h))) break; } } #else BUG(); #endif }
static int walk_hugetlb_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); unsigned long sz = huge_page_size(h); pte_t *pte; int err = 0; do { next = hugetlb_entry_end(h, addr, end); pte = huge_pte_offset(walk->mm, addr & hmask, sz); if (pte) err = walk->hugetlb_entry(pte, hmask, addr, next, walk); else if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; } while (addr = next, addr != end); return err; }
pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writeable) { unsigned int shift = huge_page_shift(hstate_vma(vma)); return hugepage_shift_to_tte(entry, shift); }
pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable) { size_t pagesize = huge_page_size(hstate_vma(vma)); if (pagesize == CONT_PTE_SIZE) { entry = pte_mkcont(entry); } else if (pagesize == CONT_PMD_SIZE) { entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { pr_warn("%s: unrecognized huge page size 0x%lx\n", __func__, pagesize); } return entry; }
static int walk_hugetlb_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); pte_t *pte; int err = 0; do { next = hugetlb_entry_end(h, addr, end); pte = huge_pte_offset(walk->mm, addr & hmask); if (pte && walk->hugetlb_entry) err = walk->hugetlb_entry(pte, hmask, addr, next, walk); if (err) return err; } while (addr = next, addr != end); return 0; }
extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { #ifdef HUGETLB_NEED_PRELOAD /* * The "return 1" forces a call of update_mmu_cache, which will write a * TLB entry. Without this, platforms that don't do a write of the TLB * entry in the TLB miss handler asm will fault ad infinitum. */ ptep_set_access_flags(vma, addr, ptep, pte, dirty); return 1; #else int changed, psize; pte = set_access_flags_filter(pte, vma, dirty); changed = !pte_same(*(ptep), pte); if (changed) { #ifdef CONFIG_PPC_BOOK3S_64 struct hstate *h = hstate_vma(vma); psize = hstate_get_psize(h); #ifdef CONFIG_DEBUG_VM assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep)); #endif #else /* * Not used on non book3s64 platforms. But 8xx * can possibly use tsize derived from hstate. */ psize = 0; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif }
/* * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is * called with mmap_sem held, it will release mmap_sem before returning. */ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, bool zeropage) { int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED; int vm_shared = dst_vma->vm_flags & VM_SHARED; ssize_t err; pte_t *dst_pte; unsigned long src_addr, dst_addr; long copied; struct page *page; struct hstate *h; unsigned long vma_hpagesize; pgoff_t idx; u32 hash; struct address_space *mapping; /* * There is no default zero huge page for all huge page sizes as * supported by hugetlb. A PMD_SIZE huge pages may exist as used * by THP. Since we can not reliably insert a zero page, this * feature is not supported. */ if (zeropage) { up_read(&dst_mm->mmap_sem); return -EINVAL; } src_addr = src_start; dst_addr = dst_start; copied = 0; page = NULL; vma_hpagesize = vma_kernel_pagesize(dst_vma); /* * Validate alignment based on huge page size */ err = -EINVAL; if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) goto out_unlock; retry: /* * On routine entry dst_vma is set. If we had to drop mmap_sem and * retry, dst_vma will be set to NULL and we must lookup again. */ if (!dst_vma) { err = -ENOENT; dst_vma = find_vma(dst_mm, dst_start); if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) goto out_unlock; /* * Check the vma is registered in uffd, this is * required to enforce the VM_MAYWRITE check done at * uffd registration time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; if (dst_start < dst_vma->vm_start || dst_start + len > dst_vma->vm_end) goto out_unlock; err = -EINVAL; if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) goto out_unlock; vm_shared = dst_vma->vm_flags & VM_SHARED; } if (WARN_ON(dst_addr & (vma_hpagesize - 1) || (len - copied) & (vma_hpagesize - 1))) goto out_unlock; /* * If not shared, ensure the dst_vma has a anon_vma. */ err = -ENOMEM; if (!vm_shared) { if (unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; } h = hstate_vma(dst_vma); while (src_addr < src_start + len) { pte_t dst_pteval; BUG_ON(dst_addr >= dst_start + len); VM_BUG_ON(dst_addr & ~huge_page_mask(h)); /* * Serialize via hugetlb_fault_mutex */ idx = linear_page_index(dst_vma, dst_addr); mapping = dst_vma->vm_file->f_mapping; hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, idx, dst_addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); err = -ENOMEM; dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); if (!dst_pte) { mutex_unlock(&hugetlb_fault_mutex_table[hash]); goto out_unlock; } err = -EEXIST; dst_pteval = huge_ptep_get(dst_pte); if (!huge_pte_none(dst_pteval)) { mutex_unlock(&hugetlb_fault_mutex_table[hash]); goto out_unlock; } err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, src_addr, &page); mutex_unlock(&hugetlb_fault_mutex_table[hash]); vm_alloc_shared = vm_shared; cond_resched(); if (unlikely(err == -ENOENT)) { up_read(&dst_mm->mmap_sem); BUG_ON(!page); err = copy_huge_page_from_user(page, (const void __user *)src_addr, pages_per_huge_page(h), true); if (unlikely(err)) { err = -EFAULT; goto out; } down_read(&dst_mm->mmap_sem); dst_vma = NULL; goto retry; } else BUG_ON(page); if (!err) { dst_addr += vma_hpagesize; src_addr += vma_hpagesize; copied += vma_hpagesize; if (fatal_signal_pending(current)) err = -EINTR; } if (err) break; } out_unlock: up_read(&dst_mm->mmap_sem); out: if (page) { /* * We encountered an error and are about to free a newly * allocated huge page. * * Reservation handling is very subtle, and is different for * private and shared mappings. See the routine * restore_reserve_on_error for details. Unfortunately, we * can not call restore_reserve_on_error now as it would * require holding mmap_sem. * * If a reservation for the page existed in the reservation * map of a private mapping, the map was modified to indicate * the reservation was consumed when the page was allocated. * We clear the PagePrivate flag now so that the global * reserve count will not be incremented in free_huge_page. * The reservation map will still indicate the reservation * was consumed and possibly prevent later page allocation. * This is better than leaking a global reservation. If no * reservation existed, it is still safe to clear PagePrivate * as no adjustments to reservation counts were made during * allocation. * * The reservation map for shared mappings indicates which * pages have reservations. When a huge page is allocated * for an address with a reservation, no change is made to * the reserve map. In this case PagePrivate will be set * to indicate that the global reservation count should be * incremented when the page is freed. This is the desired * behavior. However, when a huge page is allocated for an * address without a reservation a reservation entry is added * to the reservation map, and PagePrivate will not be set. * When the page is freed, the global reserve count will NOT * be incremented and it will appear as though we have leaked * reserved page. In this case, set PagePrivate so that the * global reserve count will be incremented to match the * reservation map entry which was created. * * Note that vm_alloc_shared is based on the flags of the vma * for which the page was originally allocated. dst_vma could * be different or NULL on error. */ if (vm_alloc_shared) SetPagePrivate(page); else ClearPagePrivate(page); put_page(page); } BUG_ON(copied < 0); BUG_ON(err > 0); BUG_ON(!copied && !err); return copied ? copied : err; }