/** * Claim a given page from the buddy subsystem. This only works, if the page registered within the buddy system and marked as free * */ int try_claim_free_buddy_page(struct page* requested_page, unsigned int allowed_sources, struct page** allocated_page, unsigned long* actual_source) { int ret = CLAIMED_TRY_NEXT; if (allowed_sources & SOURCE_FREE_BUDDY_PAGE) { struct page * locked_page = NULL; unsigned long pfn = page_to_pfn(requested_page); unsigned int locked_page_count_after, locked_page_count_before; /* * Isolate the page, so that it doesn't get reallocated if it * was free. */ set_migratetype_isolate(requested_page); locked_page_count_before = page_count(requested_page); if (0 == page_count(compound_head(requested_page))) { if (is_free_buddy_page(requested_page)) { printk(KERN_DEBUG "try_claim_free_buddy_page: %#lx free buddy page\n", pfn); /* get, while page is still isolated */ locked_page = claim_free_buddy_page(requested_page); } else { printk(KERN_DEBUG "try_claim_free_buddy_page: %#lx: unknown zero refcount page type %lx\n", pfn, requested_page->flags); } } else { long cppfn = page_to_pfn(compound_head(requested_page)); /* Not a free page */ printk(KERN_DEBUG "try_claim_free_buddy_page: %#lx: %#lx refcount %i ,page type %lx\n", pfn, cppfn, page_count(compound_head(requested_page)), requested_page->flags); } unset_migratetype_isolate(requested_page); if (locked_page) { /* * The page is now rightfully ours! */ locked_page_count_after = page_count(locked_page); printk(KERN_DEBUG "Buddy: Requested pfn %lx, allocated pfn %lx with pagecount %i (was:%i)\n", page_to_pfn(requested_page), page_to_pfn(locked_page), locked_page_count_after, locked_page_count_before); *actual_source = SOURCE_FREE_BUDDY_PAGE; ret = CLAIMED_SUCCESSFULLY; } } return ret; }
/* * Mark a page as having seen activity. * * inactive,unreferenced -> inactive,referenced * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced * * When a newly allocated page is not yet visible, so safe for non-atomic ops, * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ void mark_page_accessed(struct page *page) { page = compound_head(page); if (!PageActive(page) && !PageUnevictable(page) && PageReferenced(page)) { /* * If the page is on the LRU, queue it for activation via * activate_page_pvecs. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ if (PageLRU(page)) activate_page(page); else __lru_cache_activate_page(page); ClearPageReferenced(page); if (page_is_file_cache(page)) workingset_activation(page); } else if (!PageReferenced(page)) { SetPageReferenced(page); } if (page_is_idle(page)) clear_page_idle(page); }
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ unsigned long flags; bool got; struct page *page_head = compound_head(page); /* Ref to put_compound_page() comment. */ if (!__compound_tail_refcounted(page_head)) { smp_rmb(); if (likely(PageTail(page))) { /* * This is a hugetlbfs page or a slab * page. __split_huge_page_refcount * cannot race here. */ VM_BUG_ON_PAGE(!PageHead(page_head), page_head); __get_page_tail_foll(page, true); return true; } else { /* * __split_huge_page_refcount run * before us, "page" was a THP * tail. The split page_head has been * freed and reallocated as slab or * hugetlbfs page of smaller order * (only possible if reallocated as * slab on x86). */ return false; } } got = false; if (likely(page != page_head && get_page_unless_zero(page_head))) { /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } return got; }
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ bool got = false; struct page *page_head; /* * If this is a hugetlbfs page it cannot be split under us. Simply * increment refcount for the head page. */ if (PageHuge(page)) { page_head = compound_head(page); atomic_inc(&page_head->_count); got = true; } else { unsigned long flags; page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { /* Ref to put_compound_page() comment. */ if (PageSlab(page_head)) { if (likely(PageTail(page))) { __get_page_tail_foll(page, false); return true; } else { put_page(page_head); return false; } } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } } return got; }
void activate_page(struct page *page) { struct zone *zone = page_zone(page); page = compound_head(page); spin_lock_irq(zone_lru_lock(zone)); __activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL); spin_unlock_irq(zone_lru_lock(zone)); }
static void put_compound_page(struct page *page) { page = compound_head(page); if (put_page_testzero(page)) { compound_page_dtor *dtor; dtor = get_compound_page_dtor(page); (*dtor)(page); } }
void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, u64 bound) { int idx; u64 addr; struct ib_device *dev = umem->context->device; virt = max_t(u64, virt, ib_umem_start(umem)); bound = min_t(u64, bound, ib_umem_end(umem)); /* Note that during the run of this function, the * notifiers_count of the MR is > 0, preventing any racing * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ for (addr = virt; addr < bound; addr += PAGE_SIZE) { idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; mutex_lock(&umem->odp_data->umem_mutex); if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; #ifdef CONFIG_COMPAT_USE_COMPOUND_TRANS_HEAD struct page *head_page = compound_trans_head(page); #else struct page *head_page = compound_head(page); #endif dma_addr_t dma_addr = umem->odp_data->dma_list[idx] & ODP_DMA_ADDR_MASK; WARN_ON(!dma_addr); ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); if (umem->odp_data->dma_list[idx] & ODP_WRITE_ALLOWED_BIT) /* * set_page_dirty prefers being called with * the page lock. However, MMU notifiers are * called sometimes with and sometimes without * the lock. We rely on the umem_mutex instead * to prevent other mmu notifiers from * continuing and allowing the page mapping to * be removed. */ set_page_dirty(head_page); /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; atomic_inc(&dev->odp_statistics.num_invalidation_pages); } mutex_unlock(&umem->odp_data->umem_mutex); } }
void activate_page(struct page *page) { page = compound_head(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); get_page(page); if (!pagevec_add(pvec, page) || PageCompound(page)) pagevec_lru_move_fn(pvec, __activate_page, NULL); put_cpu_var(activate_page_pvecs); } }
static int hwpoison_inject(void *data, u64 val) { unsigned long pfn = val; struct page *p; struct page *hpage; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!pfn_valid(pfn)) return -ENXIO; p = pfn_to_page(pfn); hpage = compound_head(p); /* * This implies unable to support free buddy pages. */ if (!get_page_unless_zero(hpage)) return 0; if (!hwpoison_filter_enable) goto inject; if (!PageLRU(p) && !PageHuge(p)) shake_page(p, 0); /* * This implies unable to support non-LRU pages. */ if (!PageLRU(p) && !PageHuge(p)) return 0; /* * do a racy check with elevated page count, to make sure PG_hwpoison * will only be set for the targeted owner (or on a free page). * We temporarily take page lock for try_get_mem_cgroup_from_page(). * memory_failure() will redo the check reliably inside page lock. */ lock_page(hpage); err = hwpoison_filter(hpage); unlock_page(hpage); if (err) return 0; inject: printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); return memory_failure(pfn, 18, MF_COUNT_INCREASED); }
struct page *alloc_migrate_target(struct page *page, unsigned long nid, int **resultp) { /* * hugeTLB: allocate a destination page from a nearest neighbor node, * accordance with memory policy of the user process if possible. For * now as a simple work-around, we use the next node for destination. * Normal page: use prefer mempolicy for destination if called by * hotplug, use default mempolicy for destination if called by cma. */ if (PageHuge(page)) return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(page_to_nid(page), node_online_map)); else return alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0); }
/* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much * register pressure. */ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; pte_t *ptep; if (tlb_type == hypervisor) { result = _PAGE_PRESENT_4V|_PAGE_P_4V; if (write) result |= _PAGE_WRITE_4V; } else { result = _PAGE_PRESENT_4U|_PAGE_P_4U; if (write) result |= _PAGE_WRITE_4U; } mask = result | _PAGE_SPECIAL; ptep = pte_offset_kernel(&pmd, addr); do { struct page *page, *head; pte_t pte = *ptep; if ((pte_val(pte) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); /* The hugepage case is simplified on sparc64 because * we encode the sub-page pfn offsets into the * hugepage PTEs. We could optimize this in the future * use page_cache_add_speculative() for the hugepage case. */ page = pte_page(pte); head = compound_head(page); if (!page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); return 0; } pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); return 1; }
/** * Try to claim a "free" page that is neither in the buddy system nor somewhere else * * This is currently disabled because it is virtually impossible to detect, if a page * is free in the described way. * * To quote from memory-failure.c * * * We need/can do nothing about count=0 pages. * 1) it's a free page, and therefore in safe hand: * prep_new_page() will be the gate keeper. * 2) it's part of a non-compound high order page. * Implies some kernel user: cannot stop them from * R/W the page; let's pray that the page has been * used and will be freed some time later. * In fact it's dangerous to directly bump up page count from 0, * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. * */ inline int try_claim_free_page(struct page* requested_page, unsigned int allowed_sources, struct page** allocated_page, unsigned long* actual_source) { int ret = CLAIMED_TRY_NEXT; int enabled = 0; if ( enabled && (allowed_sources & SOURCE_FREE_PAGE)) { struct page* compound_head_page; compound_head_page = compound_head(requested_page); /* * This is a heuristic: Normally all pages should be 'somewhere', so this is very likely to be 'false' for all pages * * Additionally this test is propably not correct anyway. * */ if (compound_head_page == requested_page && !free_pages_check__just_test(requested_page) == 0 && requested_page->lru.next == NULL && requested_page->lru.prev == NULL) { int locked_page_count_before, locked_page_count_after; locked_page_count_before = page_count(requested_page); get_page(requested_page); if (requested_page){ /* * The page is now rightfully ours! */ locked_page_count_after = page_count(requested_page); printk(KERN_DEBUG "Requested pfn %lx with pagecount %i (was:%i)\n", page_to_pfn(requested_page), locked_page_count_after, locked_page_count_before); *actual_source = SOURCE_FREE_PAGE; ret = CLAIMED_SUCCESSFULLY; }else{ /** * We could not lock the page */ printk(KERN_DEBUG "Requested pfn %lx but could not get it though it was _count == 0.)\n", page_to_pfn(requested_page)); } } } return ret; }
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; pte_t *ptep; if (tlb_type == hypervisor) { result = _PAGE_PRESENT_4V|_PAGE_P_4V; if (write) result |= _PAGE_WRITE_4V; } else { result = _PAGE_PRESENT_4U|_PAGE_P_4U; if (write) result |= _PAGE_WRITE_4U; } mask = result | _PAGE_SPECIAL; ptep = pte_offset_kernel(&pmd, addr); do { struct page *page, *head; pte_t pte = *ptep; if ((pte_val(pte) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); if (!page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); return 0; } if (head != page) get_huge_page_tail(page); pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); return 1; }
static void put_compound_page(struct page *page) { struct page *page_head; /* * We see the PageCompound set and PageTail not set, so @page maybe: * 1. hugetlbfs head page, or * 2. THP head page. */ if (likely(!PageTail(page))) { if (put_page_testzero(page)) { /* * By the time all refcounts have been released * split_huge_page cannot run anymore from under us. */ if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } return; } /* * We see the PageCompound set and PageTail set, so @page maybe: * 1. a tail hugetlbfs page, or * 2. a tail THP page, or * 3. a split THP page. * * Case 3 is possible, as we may race with * __split_huge_page_refcount tearing down a THP page. */ page_head = compound_head(page); if (!__compound_tail_refcounted(page_head)) put_unrefcounted_compound_page(page_head, page); else put_refcounted_compound_page(page_head, page); }
static int ept_set_epte(struct vmx_vcpu *vcpu, int make_write, unsigned long gpa, unsigned long hva) { int ret; epte_t *epte, flags; struct page *page; unsigned huge_shift; int level; ret = get_user_pages_fast(hva, 1, make_write, &page); if (ret != 1) { ret = ept_set_pfnmap_epte(vcpu, make_write, gpa, hva); if (ret) printk(KERN_ERR "ept: failed to get user page %lx\n", hva); return ret; } spin_lock(&vcpu->ept_lock); huge_shift = compound_order(compound_head(page)) + PAGE_SHIFT; level = 0; if (huge_shift == 30) level = 2; else if (huge_shift == 21) level = 1; ret = ept_lookup_gpa(vcpu, (void *) gpa, level, 1, &epte); if (ret) { spin_unlock(&vcpu->ept_lock); put_page(page); printk(KERN_ERR "ept: failed to lookup EPT entry\n"); return ret; } if (epte_present(*epte)) { if (!epte_big(*epte) && level == 2) ept_clear_l2_epte(epte); else if (!epte_big(*epte) && level == 1) ept_clear_l1_epte(epte); else ept_clear_epte(epte); } flags = __EPTE_READ | __EPTE_EXEC | __EPTE_TYPE(EPTE_TYPE_WB) | __EPTE_IPAT; if (make_write) flags |= __EPTE_WRITE; if (vcpu->ept_ad_enabled) { /* premark A/D to avoid extra memory references */ flags |= __EPTE_A; if (make_write) flags |= __EPTE_D; } if (level) { struct page *tmp = page; page = compound_head(page); get_page(page); put_page(tmp); flags |= __EPTE_SZ; } *epte = epte_addr(page_to_phys(page)) | flags; spin_unlock(&vcpu->ept_lock); return 0; }
/** * release_pages - batched put_page() * @pages: array of pages to release * @nr: number of pages * * Decrement the reference count on all the pages in @pages. If it * fell to zero, remove the page from the LRU and free it. */ void release_pages(struct page **pages, int nr) { int i; LIST_HEAD(pages_to_free); struct pglist_data *locked_pgdat = NULL; struct lruvec *lruvec; unsigned long uninitialized_var(flags); unsigned int uninitialized_var(lock_batch); for (i = 0; i < nr; i++) { struct page *page = pages[i]; /* * Make sure the IRQ-safe lock-holding time does not get * excessive with a continuous string of pages from the * same pgdat. The lock is held only if pgdat != NULL. */ if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) { spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); locked_pgdat = NULL; } if (is_huge_zero_page(page)) continue; /* Device public page can not be huge page */ if (is_device_public_page(page)) { if (locked_pgdat) { spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); locked_pgdat = NULL; } put_devmap_managed_page(page); continue; } page = compound_head(page); if (!put_page_testzero(page)) continue; if (PageCompound(page)) { if (locked_pgdat) { spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); locked_pgdat = NULL; } __put_compound_page(page); continue; } if (PageLRU(page)) { struct pglist_data *pgdat = page_pgdat(page); if (pgdat != locked_pgdat) { if (locked_pgdat) spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); lock_batch = 0; locked_pgdat = pgdat; spin_lock_irqsave(&locked_pgdat->lru_lock, flags); } lruvec = mem_cgroup_page_lruvec(page, locked_pgdat); VM_BUG_ON_PAGE(!PageLRU(page), page); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } /* Clear Active bit in case of parallel mark_page_accessed */ __ClearPageActive(page); __ClearPageWaiters(page); list_add(&page->lru, &pages_to_free); } if (locked_pgdat) spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); mem_cgroup_uncharge_list(&pages_to_free); free_unref_page_list(&pages_to_free); }
static __always_inline void put_refcounted_compound_page(struct page *page_head, struct page *page) { if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * @page_head wasn't a dangling pointer but it may not * be a head page anymore by the time we obtain the * lock. That is ok as long as it can't be freed from * under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { /* * The @page_head may have been freed * and reallocated as a compound page * of smaller order and then freed * again. All we know is that it * cannot have become: a THP page, a * compound page of higher order, a * tail page. That is because we * still hold the refcount of the * split THP tail and page_head was * the THP head before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON_PAGE(page_head != compound_head(page), page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON_PAGE(1, page_head); /* __split_huge_page_refcount will wait now */ VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page); atomic_dec(&page->_mapcount); VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* @page_head is a dangling pointer */ VM_BUG_ON_PAGE(PageTail(page), page); goto out_put_single; } }
static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * THP can not break up slab pages so avoid taking * compound_lock(). Slab performs non-atomic bit ops * on page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ if (PageSlab(page_head) || PageHeadHuge(page_head)) { if (likely(PageTail(page))) { /* * __split_huge_page_refcount * cannot race here. */ VM_BUG_ON(!PageHead(page_head)); atomic_dec(&page->_mapcount); if (put_page_testzero(page_head)) VM_BUG_ON(1); if (put_page_testzero(page_head)) __put_compound_page(page_head); return; } else /* * __split_huge_page_refcount * run before us, "page" was a * THP tail. The split * page_head has been freed * and reallocated as slab or * hugetlbfs page of smaller * order (only possible if * reallocated as slab on * x86). */ goto skip_lock; } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: if (put_page_testzero(page_head)) { /* * The head page may have been * freed and reallocated as a * compound page of smaller * order and then freed again. * All we know is that it * cannot have become: a THP * page, a compound page of * higher order, a tail page. * That is because we still * hold the refcount of the * split THP tail and * page_head was the THP head * before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }
u64 stable_page_flags(struct page *page) { u64 k; u64 u; /* * pseudo flag: KPF_NOPAGE * it differentiates a memory hole from a page with no flags */ if (!page) return 1 << KPF_NOPAGE; k = page->flags; u = 0; /* * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the * simple test in page_mapcount() is not enough. */ if (!PageSlab(page) && page_mapcount(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; if (PageKsm(page)) u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) u |= 1 << KPF_COMPOUND_HEAD; if (PageTail(page)) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ else if (PageTransCompound(page)) { struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) u |= 1 << KPF_THP; else if (is_huge_zero_page(head)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; } } else if (is_zero_pfn(page_to_pfn(page))) u |= 1 << KPF_ZERO_PAGE; /* * Caveats on high order pages: page->_count will only be set * -1 on the head page; SLUB/SLQB do the same for PG_slab; * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) u |= 1 << KPF_BUDDY; if (PageBalloon(page)) u |= 1 << KPF_BALLOON; if (page_is_idle(page)) u |= 1 << KPF_IDLE; u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); u |= kpf_copy_bit(k, KPF_LRU, PG_lru); u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); return u; };
static void put_compound_page(struct page *page) { struct page *page_head; if (likely(!PageTail(page))) { if (put_page_testzero(page)) { /* * By the time all refcounts have been released * split_huge_page cannot run anymore from under us. */ if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } return; } /* __split_huge_page_refcount can run under us */ page_head = compound_head(page); /* * THP can not break up slab pages so avoid taking * compound_lock() and skip the tail page refcounting (in * _mapcount) too. Slab performs non-atomic bit ops on * page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is still * hot on arches that do not support * this_cpu_cmpxchg_double(). * * If "page" is part of a slab or hugetlbfs page it cannot be * splitted and the head page cannot change from under us. And * if "page" is part of a THP page under splitting, if the * head page pointed by the THP tail isn't a THP head anymore, * we'll find PageTail clear after smp_rmb() and we'll treat * it as a single page. */ if (!__compound_tail_refcounted(page_head)) { /* * If "page" is a THP tail, we must read the tail page * flags after the head page flags. The * split_huge_page side enforces write memory barriers * between clearing PageTail and before the head page * can be freed and reallocated. */ smp_rmb(); if (likely(PageTail(page))) { /* * __split_huge_page_refcount cannot race * here. */ VM_BUG_ON_PAGE(!PageHead(page_head), page_head); VM_BUG_ON_PAGE(page_mapcount(page) != 0, page); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab * compound page, the tail pin must * not be the last reference held on * the page, because the PG_slab * cannot be cleared before all tail * pins (which skips the _mapcount * tail refcounting) have been * released. For hugetlbfs the tail * pin may be the last reference on * the page instead, because * PageHeadHuge will not go away until * the compound page enters the buddy * allocator. */ VM_BUG_ON_PAGE(PageSlab(page_head), page_head); __put_compound_page(page_head); } return; } else /* * __split_huge_page_refcount run before us, * "page" was a THP tail. The split page_head * has been freed and reallocated as slab or * hugetlbfs page of smaller order (only * possible if reallocated as slab on x86). */ goto out_put_single; } if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * page_head wasn't a dangling pointer but it may not * be a head page anymore by the time we obtain the * lock. That is ok as long as it can't be freed from * under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { /* * The head page may have been freed * and reallocated as a compound page * of smaller order and then freed * again. All we know is that it * cannot have become: a THP page, a * compound page of higher order, a * tail page. That is because we * still hold the refcount of the * split THP tail and page_head was * the THP head before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON_PAGE(page_head != page->first_page, page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON_PAGE(1, page_head); /* __split_huge_page_refcount will wait now */ VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page); atomic_dec(&page->_mapcount); VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON_PAGE(PageTail(page), page); goto out_put_single; } }
static void put_compound_page(struct page *page) { /* * hugetlbfs pages cannot be split from under us. If this is a * hugetlbfs page, check refcount on head page and release the page if * the refcount becomes zero. */ if (PageHuge(page)) { page = compound_head(page); if (put_page_testzero(page)) __put_compound_page(page); return; } if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * THP can not break up slab pages so avoid taking * compound_lock(). Slab performs non-atomic bit ops * on page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ if (PageSlab(page_head)) { if (PageTail(page)) { if (put_page_testzero(page_head)) VM_BUG_ON(1); atomic_dec(&page->_mapcount); goto skip_lock_tail; } else goto skip_lock; } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } }