static int ept_set_epte(struct vmx_vcpu *vcpu, int make_write, unsigned long gpa, unsigned long hva) { int ret; epte_t *epte, flags; struct page *page; ret = get_user_pages_fast(hva, 1, make_write, &page); if (ret != 1) { printk(KERN_ERR "ept: failed to get user page %lx\n", hva); return ret; } spin_lock(&vcpu->ept_lock); ret = ept_lookup_gpa(vcpu, (void *) gpa, PageHuge(page) ? 1 : 0, 1, &epte); if (ret) { spin_unlock(&vcpu->ept_lock); printk(KERN_ERR "ept: failed to lookup EPT entry\n"); return ret; } if (epte_present(*epte)) { if (!epte_big(*epte) && PageHuge(page)) ept_clear_l1_epte(epte); else ept_clear_epte(epte); } flags = __EPTE_READ | __EPTE_EXEC | __EPTE_TYPE(EPTE_TYPE_WB) | __EPTE_IPAT; if (make_write) flags |= __EPTE_WRITE; if (vcpu->ept_ad_enabled) { /* premark A/D to avoid extra memory references */ flags |= __EPTE_A; if (make_write) flags |= __EPTE_D; } if (PageHuge(page)) { flags |= __EPTE_SZ; *epte = epte_addr(page_to_phys(page) & ~((1 << 21) - 1)) | flags; } else *epte = epte_addr(page_to_phys(page)) | flags; spin_unlock(&vcpu->ept_lock); return 0; }
static int hwpoison_inject(void *data, u64 val) { unsigned long pfn = val; struct page *p; struct page *hpage; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!pfn_valid(pfn)) return -ENXIO; p = pfn_to_page(pfn); hpage = compound_head(p); /* * This implies unable to support free buddy pages. */ if (!get_page_unless_zero(hpage)) return 0; if (!hwpoison_filter_enable) goto inject; if (!PageLRU(p) && !PageHuge(p)) shake_page(p, 0); /* * This implies unable to support non-LRU pages. */ if (!PageLRU(p) && !PageHuge(p)) return 0; /* * do a racy check with elevated page count, to make sure PG_hwpoison * will only be set for the targeted owner (or on a free page). * We temporarily take page lock for try_get_mem_cgroup_from_page(). * memory_failure() will redo the check reliably inside page lock. */ lock_page(hpage); err = hwpoison_filter(hpage); unlock_page(hpage); if (err) return 0; inject: printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); return memory_failure(pfn, 18, MF_COUNT_INCREASED); }
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ bool got = false; struct page *page_head; /* * If this is a hugetlbfs page it cannot be split under us. Simply * increment refcount for the head page. */ if (PageHuge(page)) { page_head = compound_head(page); atomic_inc(&page_head->_count); got = true; } else { unsigned long flags; page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { /* Ref to put_compound_page() comment. */ if (PageSlab(page_head)) { if (likely(PageTail(page))) { __get_page_tail_foll(page, false); return true; } else { put_page(page_head); return false; } } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } } return got; }
static void __put_compound_page(struct page *page) { compound_page_dtor *dtor; if (!PageHuge(page)) __page_cache_release(page); dtor = get_compound_page_dtor(page); (*dtor)(page); }
static void __put_compound_page(struct page *page) { compound_page_dtor *dtor; /* * __page_cache_release() is supposed to be called for thp, not for * hugetlb. This is because hugetlb page does never have PageLRU set * (it's never listed to any LRU lists) and no memcg routines should * be called for hugetlb (it has a separate hugetlb_cgroup.) */ if (!PageHuge(page)) __page_cache_release(page); dtor = get_compound_page_dtor(page); (*dtor)(page); }
struct page *alloc_migrate_target(struct page *page, unsigned long nid, int **resultp) { /* * hugeTLB: allocate a destination page from a nearest neighbor node, * accordance with memory policy of the user process if possible. For * now as a simple work-around, we use the next node for destination. * Normal page: use prefer mempolicy for destination if called by * hotplug, use default mempolicy for destination if called by cma. */ if (PageHuge(page)) return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(page_to_nid(page), node_online_map)); else return alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0); }
/* * Put previously isolated pages back onto the appropriate lists * from where they were once taken off for compaction/migration. * * This function shall be used whenever the isolated pageset has been * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() * and isolate_huge_page(). */ void putback_movable_pages(struct list_head *l) { struct page *page; struct page *page2; list_for_each_entry_safe(page, page2, l, lru) { if (unlikely(PageHuge(page))) { putback_active_hugepage(page); continue; } list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); if (unlikely(isolated_balloon_page(page))) balloon_page_putback(page); else putback_lru_page(page); } }
/* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the mapping's tree_lock and * mem_cgroup_begin_page_stat(). */ void __delete_from_page_cache(struct page *page, void *shadow, struct mem_cgroup *memcg) { struct address_space *mapping = page->mapping; trace_mm_filemap_delete_from_page_cache(page); /* * if we're uptodate, flush out into the cleancache, otherwise * invalidate any existing cleancache entries. We can't leave * stale data around in the cleancache once our page is gone */ if (PageUptodate(page) && PageMappedToDisk(page)) cleancache_put_page(page); else cleancache_invalidate_page(mapping, page); page_cache_tree_delete(mapping, page, shadow); page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ /* hugetlb pages do not participate in page cache accounting. */ if (!PageHuge(page)) __dec_zone_page_state(page, NR_FILE_PAGES); if (PageSwapBacked(page)) __dec_zone_page_state(page, NR_SHMEM); BUG_ON(page_mapped(page)); /* * At this point page must be either written or cleaned by truncate. * Dirty page here signals a bug and loss of unwritten data. * * This fixes dirty accounting after removing the page entirely but * leaves PageDirty set: it has no effect for truncated page and * anyway will be cleared before returning page into buddy allocator. */ if (WARN_ON_ONCE(PageDirty(page))) account_page_cleaned(page, mapping, memcg, inode_to_wb(mapping->host)); }
u64 stable_page_flags(struct page *page) { u64 k; u64 u; /* * pseudo flag: KPF_NOPAGE * it differentiates a memory hole from a page with no flags */ if (!page) return 1 << KPF_NOPAGE; k = page->flags; u = 0; /* * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the * simple test in page_mapcount() is not enough. */ if (!PageSlab(page) && page_mapcount(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; if (PageKsm(page)) u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) u |= 1 << KPF_COMPOUND_HEAD; if (PageTail(page)) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ else if (PageTransCompound(page)) { struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) u |= 1 << KPF_THP; else if (is_huge_zero_page(head)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; } } else if (is_zero_pfn(page_to_pfn(page))) u |= 1 << KPF_ZERO_PAGE; /* * Caveats on high order pages: page->_count will only be set * -1 on the head page; SLUB/SLQB do the same for PG_slab; * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) u |= 1 << KPF_BUDDY; if (PageBalloon(page)) u |= 1 << KPF_BALLOON; if (page_is_idle(page)) u |= 1 << KPF_IDLE; u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); u |= kpf_copy_bit(k, KPF_LRU, PG_lru); u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); return u; };
u64 stable_page_flags(struct page *page) { u64 k; u64 u; /* * pseudo flag: KPF_NOPAGE * it differentiates a memory hole from a page with no flags */ if (!page) return 1 << KPF_NOPAGE; k = page->flags; u = 0; /* * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the * simple test in page_mapped() is not enough. */ if (!PageSlab(page) && page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; if (PageKsm(page)) u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) u |= 1 << KPF_COMPOUND_HEAD; if (PageTail(page)) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); /* * Caveats on high order pages: * PG_buddy will only be set on the head page; SLUB/SLQB do the same * for PG_slab; SLOB won't set PG_slab at all on compound pages. */ u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); u |= kpf_copy_bit(k, KPF_LRU, PG_lru); u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); return u; };
static void put_compound_page(struct page *page) { /* * hugetlbfs pages cannot be split from under us. If this is a * hugetlbfs page, check refcount on head page and release the page if * the refcount becomes zero. */ if (PageHuge(page)) { page = compound_head(page); if (put_page_testzero(page)) __put_compound_page(page); return; } if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * THP can not break up slab pages so avoid taking * compound_lock(). Slab performs non-atomic bit ops * on page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ if (PageSlab(page_head)) { if (PageTail(page)) { if (put_page_testzero(page_head)) VM_BUG_ON(1); atomic_dec(&page->_mapcount); goto skip_lock_tail; } else goto skip_lock; } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ unsigned long flags; bool got = false; struct page *page_head; /* * If this is a hugetlbfs page it cannot be split under us. Simply * increment refcount for the head page. */ if (PageHuge(page)) { page_head = compound_head(page); atomic_inc(&page_head->_count); got = true; goto out; } page_head = compound_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { /* Ref to put_compound_page() comment. */ if (PageSlab(page_head)) { if (likely(PageTail(page))) { /* * This is a hugetlbfs page or a slab * page. __split_huge_page_refcount * cannot race here. */ VM_BUG_ON(!PageHead(page_head)); __get_page_tail_foll(page, false); return true; } else { /* * __split_huge_page_refcount run * before us, "page" was a THP * tail. The split page_head has been * freed and reallocated as slab or * hugetlbfs page of smaller order * (only possible if reallocated as * slab on x86). */ put_page(page_head); return false; } } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } out: return got; }
/** * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at * @pvmw->address * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags * must be set. pmd, pte and ptl must be NULL. * * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is * adjusted if needed (for PTE-mapped THPs). * * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in * a loop to find all PTEs that map the THP. * * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry * regardless of which page table level the page is mapped at. @pvmw->pmd is * NULL. * * Retruns false if there are no more page table entries for the page in * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. * * If you need to stop the walk before page_vma_mapped_walk() returned false, * use page_vma_mapped_walk_done(). It will do the housekeeping. */ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t pmde; /* The only possible pmd mapping has been handled on last iteration */ if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); if (pvmw->pte) goto next_pte; if (unlikely(PageHuge(pvmw->page))) { /* when pud is not present, pte will be NULL */ pvmw->pte = huge_pte_offset(mm, pvmw->address, PAGE_SIZE << compound_order(page)); if (!pvmw->pte) return false; pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte); spin_lock(pvmw->ptl); if (!check_pte(pvmw)) return not_found(pvmw); return true; } restart: pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) return false; p4d = p4d_offset(pgd, pvmw->address); if (!p4d_present(*p4d)) return false; pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); /* * Make sure the pmd value isn't cached in a register by the * compiler and used as a stale value after we've observed a * subsequent update. */ pmde = READ_ONCE(*pvmw->pmd); if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); if (likely(pmd_trans_huge(*pvmw->pmd))) { if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); if (pmd_page(*pvmw->pmd) != page) return not_found(pvmw); return true; } else if (!pmd_present(*pvmw->pmd)) { if (thp_migration_supported()) { if (!(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); if (migration_entry_to_page(entry) != page) return not_found(pvmw); return true; } } return not_found(pvmw); } else { /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } } else if (!pmd_present(pmde)) { return false; } if (!map_pte(pvmw)) goto next_pte; while (1) { if (check_pte(pvmw)) return true; next_pte: /* Seek to next pte only makes sense for THP */ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) return not_found(pvmw); do { pvmw->address += PAGE_SIZE; if (pvmw->address >= pvmw->vma->vm_end || pvmw->address >= __vma_address(pvmw->page, pvmw->vma) + hpage_nr_pages(pvmw->page) * PAGE_SIZE) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { pte_unmap(pvmw->pte); if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } goto restart; } else { pvmw->pte++; } } while (pte_none(*pvmw->pte)); if (!pvmw->ptl) { pvmw->ptl = pte_lockptr(mm, pvmw->pmd); spin_lock(pvmw->ptl); } } }