static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); VM_BUG_ON(PageHead(page_head)); if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
/* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct zone* zone, struct page *page, struct page *page_tail) { int active; enum lru_list lru; const int file = 0; struct list_head *head; VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); SetPageLRU(page_tail); if (page_evictable(page_tail, NULL)) { if (PageActive(page)) { SetPageActive(page_tail); active = 1; lru = LRU_ACTIVE_ANON; } else { active = 0; lru = LRU_INACTIVE_ANON; } update_page_reclaim_stat(zone, page_tail, file, active); if (likely(PageLRU(page))) head = page->lru.prev; else head = &zone->lru[lru].list; __add_page_to_lru_list(zone, page_tail, lru, head); } else { SetPageUnevictable(page_tail); add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); } }
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ unsigned long flags; bool got; struct page *page_head = compound_trans_head(page); /* Ref to put_compound_page() comment. */ if (!__compound_tail_refcounted(page_head)) { smp_rmb(); if (likely(PageTail(page))) { /* * This is a hugetlbfs page or a slab * page. __split_huge_page_refcount * cannot race here. */ VM_BUG_ON(!PageHead(page_head)); __get_page_tail_foll(page, true); return true; } else { /* * __split_huge_page_refcount run * before us, "page" was a THP * tail. The split page_head has been * freed and reallocated as slab or * hugetlbfs page of smaller order * (only possible if reallocated as * slab on x86). */ return false; } } got = false; if (likely(page != page_head && get_page_unless_zero(page_head))) { /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } return got; }
/* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { int uninitialized_var(active); enum lru_list lru; const int file = 0; VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); if (!list) SetPageLRU(page_tail); if (page_evictable(page_tail)) { if (PageActive(page)) { SetPageActive(page_tail); active = 1; lru = LRU_ACTIVE_ANON; } else { active = 0; lru = LRU_INACTIVE_ANON; } } else { SetPageUnevictable(page_tail); lru = LRU_UNEVICTABLE; } if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else if (list) { /* page reclaim is reclaiming a huge page */ get_page(page_tail); list_add_tail(&page_tail->lru, list); } else { struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, * so we must account for each subpage individually. * * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ add_page_to_lru_list(page_tail, lruvec, lru); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) update_page_reclaim_stat(lruvec, file, active); }
/** * Two special cases here: we could avoid taking compound_lock_irqsave * and could skip the tail refcounting(in _mapcount). * * 1. Hugetlbfs page: * * PageHeadHuge will remain true until the compound page * is released and enters the buddy allocator, and it could * not be split by __split_huge_page_refcount(). * * So if we see PageHeadHuge set, and we have the tail page pin, * then we could safely put head page. * * 2. Slab THP page: * * PG_slab is cleared before the slab frees the head page, and * tail pin cannot be the last reference left on the head page, * because the slab code is free to reuse the compound page * after a kfree/kmem_cache_free without having to check if * there's any tail pin left. In turn all tail pinsmust be always * released while the head is still pinned by the slab code * and so we know PG_slab will be still set too. * * So if we see PageSlab set, and we have the tail page pin, * then we could safely put head page. */ static __always_inline void put_unrefcounted_compound_page(struct page *page_head, struct page *page) { /* * If @page is a THP tail, we must read the tail page * flags after the head page flags. The * __split_huge_page_refcount side enforces write memory barriers * between clearing PageTail and before the head page * can be freed and reallocated. */ smp_rmb(); if (likely(PageTail(page))) { /* * __split_huge_page_refcount cannot race * here, see the comment above this function. */ VM_BUG_ON_PAGE(!PageHead(page_head), page_head); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab THP page, * the tail pin must not be the last reference * held on the page, because the PG_slab cannot * be cleared before all tail pins (which skips * the _mapcount tail refcounting) have been * released. * * If this is the tail of a hugetlbfs page, * the tail pin may be the last reference on * the page instead, because PageHeadHuge will * not go away until the compound page enters * the buddy allocator. */ VM_BUG_ON_PAGE(PageSlab(page_head), page_head); __put_compound_page(page_head); } } else /* * __split_huge_page_refcount run before us, * @page was a THP tail. The split @page_head * has been freed and reallocated as slab or * hugetlbfs page of smaller order (only * possible if reallocated as slab on x86). */ if (put_page_testzero(page)) __put_single_page(page); }
/* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct zone* zone, struct page *page, struct page *page_tail) { int active; enum lru_list lru; const int file = 0; VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); VM_BUG_ON(!spin_is_locked(&zone->lru_lock)); SetPageLRU(page_tail); if (page_evictable(page_tail, NULL)) { if (PageActive(page)) { SetPageActive(page_tail); active = 1; lru = LRU_ACTIVE_ANON; } else { active = 0; lru = LRU_INACTIVE_ANON; } update_page_reclaim_stat(zone, page_tail, file, active); } else { SetPageUnevictable(page_tail); lru = LRU_UNEVICTABLE; } if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else { struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, * so we must account for each subpage individually. * * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ add_page_to_lru_list(zone, page_tail, lru); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } }
static void put_compound_page(struct page *page) { struct page *page_head; /* * We see the PageCompound set and PageTail not set, so @page maybe: * 1. hugetlbfs head page, or * 2. THP head page. */ if (likely(!PageTail(page))) { if (put_page_testzero(page)) { /* * By the time all refcounts have been released * split_huge_page cannot run anymore from under us. */ if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } return; } /* * We see the PageCompound set and PageTail set, so @page maybe: * 1. a tail hugetlbfs page, or * 2. a tail THP page, or * 3. a split THP page. * * Case 3 is possible, as we may race with * __split_huge_page_refcount tearing down a THP page. */ page_head = compound_head(page); if (!__compound_tail_refcounted(page_head)) put_unrefcounted_compound_page(page_head, page); else put_refcounted_compound_page(page_head, page); }
/* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { const int file = 0; VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(PageCompound(page_tail), page); VM_BUG_ON_PAGE(PageLRU(page_tail), page); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock)); if (!list) SetPageLRU(page_tail); if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else if (list) { /* page reclaim is reclaiming a huge page */ get_page(page_tail); list_add_tail(&page_tail->lru, list); } else { struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, * so we must account for each subpage individually. * * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail)); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) update_page_reclaim_stat(lruvec, file, PageActive(page_tail)); }
/* * IA-32 Huge TLB Page Support for Kernel. * * Copyright (C) 2002, Rohit Seth <*****@*****.**> */ #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/err.h> #include <linux/sysctl.h> #include <asm/mman.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { unsigned long start = address; int length = 1; int nr; struct page *page; struct vm_area_struct *vma; vma = find_vma(mm, addr); if (!vma || !is_vm_hugetlb_page(vma)) return ERR_PTR(-EINVAL); pte = huge_pte_offset(mm, address); /* hugetlb should be locked, and hence, prefaulted */ WARN_ON(!pte || pte_none(*pte)); page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; WARN_ON(!PageHead(page)); return page; }
static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * THP can not break up slab pages so avoid taking * compound_lock(). Slab performs non-atomic bit ops * on page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ if (PageSlab(page_head)) { if (PageTail(page)) { if (put_page_testzero(page_head)) VM_BUG_ON(1); atomic_dec(&page->_mapcount); goto skip_lock_tail; } else goto skip_lock; } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; if (PageHeadHuge(page_head)) { if (likely(PageTail(page))) { /* * __split_huge_page_refcount * cannot race here. */ VM_BUG_ON(!PageHead(page_head)); atomic_dec(&page->_mapcount); if (put_page_testzero(page_head)) VM_BUG_ON(1); if (put_page_testzero(page_head)) __put_compound_page(page_head); return; } else { /* * __split_huge_page_refcount * run before us, "page" was a * THP tail. The split * page_head has been freed * and reallocated as slab or * hugetlbfs page of smaller * order (only possible if * reallocated as slab on * x86). */ goto skip_lock; } } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); VM_BUG_ON(PageHead(page_head)); skip_lock: if (put_page_testzero(page_head)) { /* * The head page may have been * freed and reallocated as a * compound page of smaller * order and then freed again. * All we know is that it * cannot have become: a THP * page, a compound page of * higher order, a tail page. * That is because we still * hold the refcount of the * split THP tail and * page_head was the THP head * before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
u64 stable_page_flags(struct page *page) { u64 k; u64 u; /* * pseudo flag: KPF_NOPAGE * it differentiates a memory hole from a page with no flags */ if (!page) return 1 << KPF_NOPAGE; k = page->flags; u = 0; /* * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the * simple test in page_mapcount() is not enough. */ if (!PageSlab(page) && page_mapcount(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; if (PageKsm(page)) u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) u |= 1 << KPF_COMPOUND_HEAD; if (PageTail(page)) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ else if (PageTransCompound(page)) { struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) u |= 1 << KPF_THP; else if (is_huge_zero_page(head)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; } } else if (is_zero_pfn(page_to_pfn(page))) u |= 1 << KPF_ZERO_PAGE; /* * Caveats on high order pages: page->_count will only be set * -1 on the head page; SLUB/SLQB do the same for PG_slab; * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) u |= 1 << KPF_BUDDY; if (PageBalloon(page)) u |= 1 << KPF_BALLOON; if (page_is_idle(page)) u |= 1 << KPF_IDLE; u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); u |= kpf_copy_bit(k, KPF_LRU, PG_lru); u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); return u; };
/** * follow_page_mask - look up a page descriptor from a user-virtual address * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour * @page_mask: on output, *page_mask is set according to the size of the page * * @flags can have FOLL_ flags set, defined in <linux/mm.h> * * Returns the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; *page_mask = 0; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { BUG_ON(flags & FOLL_GET); return page; } pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); pud = pud_offset(pgd, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { if (flags & FOLL_GET) return NULL; page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); return page; } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); if (flags & FOLL_GET) { /* * Refcount on tail pages are not well-defined and * shouldn't be taken. The caller should handle a NULL * return when trying to follow tail pages. */ if (PageHead(page)) get_page(page); else page = NULL; } return page; } if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) return no_page_table(vma, flags); if (pmd_trans_huge(*pmd)) { if (flags & FOLL_SPLIT) { split_huge_page_pmd(vma, address, pmd); return follow_page_pte(vma, address, pmd, flags); } ptl = pmd_lock(mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { spin_unlock(ptl); wait_split_huge_page(vma->anon_vma, pmd); } else { page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; return page; } } else spin_unlock(ptl); } return follow_page_pte(vma, address, pmd, flags); }
static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = page->first_page; smp_rmb(); /* * If PageTail is still set after smp_rmb() we can be sure * that the page->first_page we read wasn't a dangling pointer. * See __split_huge_page_refcount() smp_wmb(). */ if (likely(PageTail(page) && get_page_unless_zero(page_head))) { unsigned long flags; /* * Verify that our page_head wasn't converted * to a a regular page before we got a * reference on it. */ if (unlikely(!PageHead(page_head))) { /* PageHead is cleared after PageTail */ smp_rmb(); VM_BUG_ON(PageTail(page)); goto out_put_head; } /* * Only run compound_lock on a valid PageHead, * after having it pinned with * get_page_unless_zero() above. */ smp_mb(); /* page_head wasn't a dangling pointer */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); VM_BUG_ON(PageHead(page_head)); out_put_head: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero now that * split_huge_page_refcount is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(atomic_read(&page->_count) <= 0); atomic_dec(&page->_count); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
static __always_inline void put_refcounted_compound_page(struct page *page_head, struct page *page) { if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * @page_head wasn't a dangling pointer but it may not * be a head page anymore by the time we obtain the * lock. That is ok as long as it can't be freed from * under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { /* * The @page_head may have been freed * and reallocated as a compound page * of smaller order and then freed * again. All we know is that it * cannot have become: a THP page, a * compound page of higher order, a * tail page. That is because we * still hold the refcount of the * split THP tail and page_head was * the THP head before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON_PAGE(page_head != compound_head(page), page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON_PAGE(1, page_head); /* __split_huge_page_refcount will wait now */ VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page); atomic_dec(&page->_mapcount); VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* @page_head is a dangling pointer */ VM_BUG_ON_PAGE(PageTail(page), page); goto out_put_single; } }
static void put_compound_page(struct page *page) { struct page *page_head; if (likely(!PageTail(page))) { if (put_page_testzero(page)) { /* * By the time all refcounts have been released * split_huge_page cannot run anymore from under us. */ if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } return; } /* __split_huge_page_refcount can run under us */ page_head = compound_trans_head(page); /* * THP can not break up slab pages so avoid taking * compound_lock() and skip the tail page refcounting (in * _mapcount) too. Slab performs non-atomic bit ops on * page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is still * hot on arches that do not support * this_cpu_cmpxchg_double(). * * If "page" is part of a slab or hugetlbfs page it cannot be * splitted and the head page cannot change from under us. And * if "page" is part of a THP page under splitting, if the * head page pointed by the THP tail isn't a THP head anymore, * we'll find PageTail clear after smp_rmb() and we'll treat * it as a single page. */ if (!__compound_tail_refcounted(page_head)) { /* * If "page" is a THP tail, we must read the tail page * flags after the head page flags. The * split_huge_page side enforces write memory barriers * between clearing PageTail and before the head page * can be freed and reallocated. */ smp_rmb(); if (likely(PageTail(page))) { /* * __split_huge_page_refcount cannot race * here. */ VM_BUG_ON(!PageHead(page_head)); VM_BUG_ON(page_mapcount(page) != 0); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab * compound page, the tail pin must * not be the last reference held on * the page, because the PG_slab * cannot be cleared before all tail * pins (which skips the _mapcount * tail refcounting) have been * released. For hugetlbfs the tail * pin may be the last reference on * the page instead, because * PageHeadHuge will not go away until * the compound page enters the buddy * allocator. */ VM_BUG_ON(PageSlab(page_head)); __put_compound_page(page_head); } return; } else /* * __split_huge_page_refcount run before us, * "page" was a THP tail. The split page_head * has been freed and reallocated as slab or * hugetlbfs page of smaller order (only * possible if reallocated as slab on x86). */ goto out_put_single; } if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * page_head wasn't a dangling pointer but it may not * be a head page anymore by the time we obtain the * lock. That is ok as long as it can't be freed from * under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { /* * The head page may have been freed * and reallocated as a compound page * of smaller order and then freed * again. All we know is that it * cannot have become: a THP page, a * compound page of higher order, a * tail page. That is because we * still hold the refcount of the * split THP tail and page_head was * the THP head before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } }
u64 stable_page_flags(struct page *page) { u64 k; u64 u; /* * pseudo flag: KPF_NOPAGE * it differentiates a memory hole from a page with no flags */ if (!page) return 1 << KPF_NOPAGE; k = page->flags; u = 0; /* * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the * simple test in page_mapped() is not enough. */ if (!PageSlab(page) && page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; if (PageKsm(page)) u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) u |= 1 << KPF_COMPOUND_HEAD; if (PageTail(page)) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); /* * Caveats on high order pages: * PG_buddy will only be set on the head page; SLUB/SLQB do the same * for PG_slab; SLOB won't set PG_slab at all on compound pages. */ u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); u |= kpf_copy_bit(k, KPF_LRU, PG_lru); u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); return u; };