static void check_mm(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { long x = atomic_long_read(&mm->rss_stat.count[i]); if (unlikely(x)) printk(KERN_ALERT "BUG: Bad rss-counter state " "mm:%p idx:%d val:%ld\n", mm, i, x); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON(mm->pmd_huge_pte); #endif }
void homecache_free_pages(unsigned long addr, unsigned int order) { struct page *page; if (addr == 0) return; VM_BUG_ON(!virt_addr_valid((void *)addr)); page = virt_to_page((void *)addr); if (put_page_testzero(page)) { int pages = (1 << order); homecache_change_page_home(page, order, initial_page_home()); while (pages--) __free_page(page++); } }
/* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. */ static void __page_cache_release(struct page *page) { if (PageLRU(page)) { unsigned long flags; struct zone *zone = page_zone(page); spin_lock_irqsave(&zone->lru_lock, flags); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); del_page_from_lru(zone, page); spin_unlock_irqrestore(&zone->lru_lock, flags); } else if (PageIONBacked(page)) { ClearPageActive(page); ClearPageUnevictable(page); } }
/* * pagevec_release() for pages which are known to not be on the LRU * * This function reinitialises the caller's pagevec. */ void __pagevec_release_nonlru(struct pagevec *pvec) { int i; struct pagevec pages_to_free; pagevec_init(&pages_to_free, pvec->cold); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; VM_BUG_ON(PageLRU(page)); if (put_page_testzero(page)) pagevec_add(&pages_to_free, page); } pagevec_free(&pages_to_free); pagevec_reinit(pvec); }
/* * Batched page_cache_release(). Decrement the reference count on all the * passed pages. If it fell to zero then remove the page from the LRU and * free it. * * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * * The locking in this function is against shrink_inactive_list(): we recheck * the page count inside the lock to see whether shrink_inactive_list() * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() * will free it. */ void release_pages(struct page **pages, int nr, int cold) { int i; LIST_HEAD(pages_to_free); struct zone *zone = NULL; struct lruvec *lruvec; unsigned long uninitialized_var(flags); for (i = 0; i < nr; i++) { struct page *page = pages[i]; if (unlikely(PageCompound(page))) { if (zone) { spin_unlock_irqrestore(&zone->lru_lock, flags); zone = NULL; } put_compound_page(page); continue; } if (!put_page_testzero(page)) continue; if (PageLRU(page)) { struct zone *pagezone = page_zone(page); if (pagezone != zone) { if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); zone = pagezone; spin_lock_irqsave(&zone->lru_lock, flags); } lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } list_add(&page->lru, &pages_to_free); } if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); free_hot_cold_page_list(&pages_to_free, cold); }
static int get_gate_page(struct mm_struct *mm, unsigned long address, unsigned int gup_flags, struct vm_area_struct **vma, struct page **page) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; int ret = -EFAULT; /* user gate pages are read-only */ if (gup_flags & FOLL_WRITE) return -EFAULT; if (address > TASK_SIZE) pgd = pgd_offset_k(address); else pgd = pgd_offset_gate(mm, address); BUG_ON(pgd_none(*pgd)); p4d = p4d_offset(pgd, address); BUG_ON(p4d_none(*p4d)); pud = pud_offset(p4d, address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return -EFAULT; VM_BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map(pmd, address); if (pte_none(*pte)) goto unmap; *vma = get_gate_vma(mm); if (!page) goto out; *page = vm_normal_page(*vma, address, *pte); if (!*page) { if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte))) goto unmap; *page = pte_page(*pte); } get_page(*page); out: ret = 0; unmap: pte_unmap(pte); return ret; }
/* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much * register pressure. */ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; pte_t *ptep; if (tlb_type == hypervisor) { result = _PAGE_PRESENT_4V|_PAGE_P_4V; if (write) result |= _PAGE_WRITE_4V; } else { result = _PAGE_PRESENT_4U|_PAGE_P_4U; if (write) result |= _PAGE_WRITE_4U; } mask = result | _PAGE_SPECIAL; ptep = pte_offset_kernel(&pmd, addr); do { struct page *page, *head; pte_t pte = *ptep; if ((pte_val(pte) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); /* The hugepage case is simplified on sparc64 because * we encode the sub-page pfn offsets into the * hugepage PTEs. We could optimize this in the future * use page_cache_add_speculative() for the hugepage case. */ page = pte_page(pte); head = compound_head(page); if (!page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); return 0; } pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); return 1; }
int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE int changed = !pmd_same(*pmdp, entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed) { set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; #else /* CONFIG_TRANSPARENT_HUGEPAGE */ BUG(); return 0; #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ }
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags, const unsigned long offset) { struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; info.align_mask = PAGE_MASK & ~HPAGE_MASK; info.align_offset = 0; info.threadstack_offset = offset; addr = vm_unmapped_area(&info); /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ if (addr & ~PAGE_MASK) { VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; #ifdef CONFIG_PAX_RANDMMAP if (mm->pax_flags & MF_PAX_RANDMMAP) info.low_limit += mm->delta_mmap; #endif info.high_limit = STACK_TOP32; addr = vm_unmapped_area(&info); } return addr; }
/* * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error, i, nr = hpage_nr_pages(page); struct address_space *address_space; pgoff_t idx = swp_offset(entry); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_ref_add(page, nr); SetPageSwapCache(page); address_space = swap_address_space(entry); spin_lock_irq(&address_space->tree_lock); for (i = 0; i < nr; i++) { set_page_private(page + i, entry.val + i); error = radix_tree_insert(&address_space->page_tree, idx + i, page + i); if (unlikely(error)) break; } if (likely(!error)) { address_space->nrpages += nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); ADD_CACHE_INFO(add_total, nr); } else { /* * Only the context which have set SWAP_HAS_CACHE flag * would call add_to_swap_cache(). * So add_to_swap_cache() doesn't returns -EEXIST. */ VM_BUG_ON(error == -EEXIST); set_page_private(page + i, 0UL); while (i--) { radix_tree_delete(&address_space->page_tree, idx + i); set_page_private(page + i, 0UL); } ClearPageSwapCache(page); page_ref_sub(page, nr); } spin_unlock_irq(&address_space->tree_lock); return error; }
/* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much * register pressure. */ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { u64 mask, result; pte_t *ptep; #ifdef CONFIG_X2TLB result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); if (write) result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); #elif defined(CONFIG_SUPERH64) result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; if (write) result |= _PAGE_WRITE; #else result = _PAGE_PRESENT | _PAGE_USER; if (write) result |= _PAGE_RW; #endif mask = result | _PAGE_SPECIAL; ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; if ((pte_val(pte) & mask) != result) { pte_unmap(ptep); return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); __flush_anon_page(page, addr); flush_dcache_page(page); pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); pte_unmap(ptep - 1); return 1; }
/* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; VM_BUG_ON(!PageLocked(page)); pool_id = page->mapping->host->i_sb->cleancache_poolid; if (pool_id >= 0) { ret = (*cleancache_ops.get_page)(pool_id, page->mapping->host->i_ino, page->index, page); if (ret == 0) succ_gets++; else failed_gets++; } return ret; }
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; pte_t *ptep; if (tlb_type == hypervisor) { result = _PAGE_PRESENT_4V|_PAGE_P_4V; if (write) result |= _PAGE_WRITE_4V; } else { result = _PAGE_PRESENT_4U|_PAGE_P_4U; if (write) result |= _PAGE_WRITE_4U; } mask = result | _PAGE_SPECIAL; ptep = pte_offset_kernel(&pmd, addr); do { struct page *page, *head; pte_t pte = *ptep; if ((pte_val(pte) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); if (!page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); return 0; } if (head != page) get_huge_page_tail(page); pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); return 1; }
void copy_mm_to_paca(struct mm_struct *mm) { #ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm->context.addr_limit); get_paca()->addr_limit = mm->context.addr_limit; get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; memcpy(&get_paca()->mm_ctx_high_slices_psize, &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); #else /* CONFIG_PPC_MM_SLICES */ get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; #endif #else /* CONFIG_PPC_BOOK3S */ return; #endif }
static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) { pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; if (PageTransCompound(pfn_to_page(pfn))) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge * page. However, because we map the compound huge page and * not the individual tail page, we need to transfer the * refcount to the head page. We have to be careful that the * THP doesn't start to split while we are adjusting the * refcounts. * * We are sure this doesn't happen, because mmu_notifier_retry * was successful and we are holding the mmu_lock, so if this * THP is trying to split, it will be blocked in the mmu * notifier before touching any of the pages, specifically * before being able to call __split_huge_page_refcount(). * * We can therefore safely transfer the refcount from PG_tail * to PG_head and switch the pfn from a tail page to the head * page accordingly. */ mask = PTRS_PER_PMD - 1; VM_BUG_ON((gfn & mask) != (pfn & mask)); if (pfn & mask) { *ipap &= PMD_MASK; kvm_release_pfn_clean(pfn); pfn &= ~mask; kvm_get_pfn(pfn); *pfnp = pfn; } return true; } return false; }
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); if (!pgd) return -ENOMEM; /* stage-2 pgd must be aligned to its size */ VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long offset) { struct mm_struct *mm = current->mm; unsigned long task_size = TASK_SIZE; struct vm_unmapped_area_info info; if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = min(task_size, VA_EXCLUDE_START); info.align_mask = PAGE_MASK & ~HPAGE_MASK; info.align_offset = 0; info.threadstack_offset = offset; addr = vm_unmapped_area(&info); if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { VM_BUG_ON(addr != -ENOMEM); info.low_limit = VA_EXCLUDE_END; #ifdef CONFIG_PAX_RANDMMAP if (mm->pax_flags & MF_PAX_RANDMMAP) info.low_limit += mm->delta_mmap; #endif info.high_limit = task_size; addr = vm_unmapped_area(&info); } return addr; }
/* * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; struct address_space *address_space; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); get_page(page); SetPageSwapCache(page); set_page_private(page, entry.val); address_space = swap_address_space(entry); spin_lock_irq(&address_space->tree_lock); error = radix_tree_insert(&address_space->page_tree, entry.val, page); if (likely(!error)) { address_space->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(add_total); } spin_unlock_irq(&address_space->tree_lock); if (unlikely(error)) { /* * Only the context which have set SWAP_HAS_CACHE flag * would call add_to_swap_cache(). * So add_to_swap_cache() doesn't returns -EEXIST. */ VM_BUG_ON(error == -EEXIST); set_page_private(page, 0UL); ClearPageSwapCache(page); put_page(page); } return error; }
/* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much * register pressure. */ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; pte_t *ptep; result = _PAGE_PRESENT|_PAGE_USER; if (write) result |= _PAGE_RW; mask = result | _PAGE_SPECIAL; ptep = pte_offset_kernel(&pmd, addr); do { pte_t pte = READ_ONCE(*ptep); struct page *page; /* * Similar to the PMD case, NUMA hinting must take slow path */ if (pte_numa(pte)) return 0; if ((pte_val(pte) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); if (!page_cache_get_speculative(page)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(page); return 0; } pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); return 1; }
/** * dma_release_from_contiguous() - release allocated pages * @dev: Pointer to device for which the pages were allocated. * @pages: Allocated pages. * @count: Number of allocated pages. * * This function releases memory allocated by dma_alloc_from_contiguous(). * It returns false when provided pages do not belong to contiguous area and * true otherwise. */ bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count) { struct cma *cma = dev_get_cma_area(dev); unsigned long pfn; if (!cma || !pages) return false; pr_debug("%s(page %p)\n", __func__, (void *)pages); pfn = page_to_pfn(pages); if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) return false; VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); clear_cma_bitmap(cma, pfn, count); return true; }
/* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { const int file = 0; VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(PageCompound(page_tail), page); VM_BUG_ON_PAGE(PageLRU(page_tail), page); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock)); if (!list) SetPageLRU(page_tail); if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else if (list) { /* page reclaim is reclaiming a huge page */ get_page(page_tail); list_add_tail(&page_tail->lru, list); } else { struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, * so we must account for each subpage individually. * * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail)); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) update_page_reclaim_stat(lruvec, file, PageActive(page_tail)); }
/* * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ void __pagevec_lru_add(struct pagevec *pvec) { int i; struct zone *zone = NULL; for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct zone *pagezone = page_zone(page); if (pagezone != zone) { if (zone) spin_unlock_irq(&zone->lru_lock); zone = pagezone; spin_lock_irq(&zone->lru_lock); } VM_BUG_ON(PageLRU(page)); SetPageLRU(page); add_page_to_inactive_list(zone, page); } if (zone) spin_unlock_irq(&zone->lru_lock); release_pages(pvec->pages, pvec->nr, pvec->cold); pagevec_reinit(pvec); }
/* * Hacked from kernel function __get_user_pages in mm/memory.c * * Handle buffers allocated by other kernel space driver and mmaped into user * space, function Ignore the VM_PFNMAP and VM_IO flag in VMA structure * * Get physical pages from user space virtual address and update into page list */ static int __get_pfnmap_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i, ret; unsigned long vm_flags; if (nr_pages <= 0) return 0; VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); /* * Require read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. */ vm_flags = (gup_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct *vma; vma = find_vma(mm, start); if (!vma) { dev_err(atomisp_dev, "find_vma failed\n"); return i ? : -EFAULT; } if (is_vm_hugetlb_page(vma)) { /* i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, gup_flags); */ continue; } do { struct page *page; unsigned long pfn; /* * If we have a pending SIGKILL, don't keep faulting * pages and potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) { dev_err(atomisp_dev, "fatal_signal_pending in %s\n", __func__); return i ? i : -ERESTARTSYS; } ret = follow_pfn(vma, start, &pfn); if (ret) { dev_err(atomisp_dev, "follow_pfn() failed\n"); return i ? : -EFAULT; } page = pfn_to_page(pfn); if (IS_ERR(page)) return i ? i : PTR_ERR(page); if (pages) { pages[i] = page; get_page(page); flush_anon_page(vma, page, start); flush_dcache_page(page); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; nr_pages--; } while (nr_pages && start < vma->vm_end); } while (nr_pages);
/* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; VM_BUG_ON(!PageLocked(page)); pool_id = page->mapping->host->i_sb->cleancache_poolid; if (pool_id < 0) goto out; if (cleancache_get_key(page->mapping->host, &key) < 0) goto out; ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); if (ret == 0) cleancache_succ_gets++; else cleancache_failed_gets++; out: return ret; } EXPORT_SYMBOL(__cleancache_get_page); /* * "Put" data from a page to cleancache and associate it with the * (previously-obtained per-filesystem) poolid and the page's, * inode and page index. Page must be locked. Note that a put_page * always "succeeds", though a subsequent get_page may succeed or fail. */ void __cleancache_put_page(struct page *page) { int pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; VM_BUG_ON(!PageLocked(page)); pool_id = page->mapping->host->i_sb->cleancache_poolid; if (pool_id >= 0 && cleancache_get_key(page->mapping->host, &key) >= 0) { (*cleancache_ops.put_page)(pool_id, key, page->index, page); cleancache_puts++; } } EXPORT_SYMBOL(__cleancache_put_page); /* * Invalidate any data from cleancache associated with the poolid and the * page's inode and page index so that a subsequent "get" will fail. */ void __cleancache_invalidate_page(struct address_space *mapping, struct page *page) { /* careful... page->mapping is NULL sometimes when this is called */ int pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; if (pool_id >= 0) { VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { (*cleancache_ops.invalidate_page)(pool_id, key, page->index); cleancache_flushes++; } } } EXPORT_SYMBOL(__cleancache_invalidate_page); /* * Invalidate all data from cleancache associated with the poolid and the * mappings's inode so that all subsequent gets to this poolid/inode * will fail. */ void __cleancache_invalidate_inode(struct address_space *mapping) { int pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) (*cleancache_ops.invalidate_inode)(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); /* * Called by any cleancache-enabled filesystem at time of unmount; * note that pool_id is surrendered and may be reutrned by a subsequent * cleancache_init_fs or cleancache_init_shared_fs */ void __cleancache_invalidate_fs(struct super_block *sb) { if (sb->cleancache_poolid >= 0) { int old_poolid = sb->cleancache_poolid; sb->cleancache_poolid = -1; (*cleancache_ops.invalidate_fs)(old_poolid); } } EXPORT_SYMBOL(__cleancache_invalidate_fs); #ifdef CONFIG_SYSFS /* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ #define CLEANCACHE_SYSFS_RO(_name) \ static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%lu\n", cleancache_##_name); \ } \ static struct kobj_attribute cleancache_##_name##_attr = { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .show = cleancache_##_name##_show, \ } CLEANCACHE_SYSFS_RO(succ_gets); CLEANCACHE_SYSFS_RO(failed_gets); CLEANCACHE_SYSFS_RO(puts); CLEANCACHE_SYSFS_RO(flushes); static struct attribute *cleancache_attrs[] = { &cleancache_succ_gets_attr.attr, &cleancache_failed_gets_attr.attr, &cleancache_puts_attr.attr, &cleancache_flushes_attr.attr, NULL, }; static struct attribute_group cleancache_attr_group = { .attrs = cleancache_attrs, .name = "cleancache", }; #endif /* CONFIG_SYSFS */ static int __init init_cleancache(void) { #ifdef CONFIG_SYSFS int err; err = sysfs_create_group(mm_kobj, &cleancache_attr_group); #endif /* CONFIG_SYSFS */ return 0; } module_init(init_cleancache)
static void put_compound_page(struct page *page) { struct page *page_head; if (likely(!PageTail(page))) { if (put_page_testzero(page)) { /* * By the time all refcounts have been released * split_huge_page cannot run anymore from under us. */ if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); } return; } /* __split_huge_page_refcount can run under us */ page_head = compound_trans_head(page); /* * THP can not break up slab pages so avoid taking * compound_lock() and skip the tail page refcounting (in * _mapcount) too. Slab performs non-atomic bit ops on * page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is still * hot on arches that do not support * this_cpu_cmpxchg_double(). * * If "page" is part of a slab or hugetlbfs page it cannot be * splitted and the head page cannot change from under us. And * if "page" is part of a THP page under splitting, if the * head page pointed by the THP tail isn't a THP head anymore, * we'll find PageTail clear after smp_rmb() and we'll treat * it as a single page. */ if (!__compound_tail_refcounted(page_head)) { /* * If "page" is a THP tail, we must read the tail page * flags after the head page flags. The * split_huge_page side enforces write memory barriers * between clearing PageTail and before the head page * can be freed and reallocated. */ smp_rmb(); if (likely(PageTail(page))) { /* * __split_huge_page_refcount cannot race * here. */ VM_BUG_ON(!PageHead(page_head)); VM_BUG_ON(page_mapcount(page) != 0); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab * compound page, the tail pin must * not be the last reference held on * the page, because the PG_slab * cannot be cleared before all tail * pins (which skips the _mapcount * tail refcounting) have been * released. For hugetlbfs the tail * pin may be the last reference on * the page instead, because * PageHeadHuge will not go away until * the compound page enters the buddy * allocator. */ VM_BUG_ON(PageSlab(page_head)); __put_compound_page(page_head); } return; } else /* * __split_huge_page_refcount run before us, * "page" was a THP tail. The split page_head * has been freed and reallocated as slab or * hugetlbfs page of smaller order (only * possible if reallocated as slab on x86). */ goto out_put_single; } if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; /* * page_head wasn't a dangling pointer but it may not * be a head page anymore by the time we obtain the * lock. That is ok as long as it can't be freed from * under us. */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { /* * The head page may have been freed * and reallocated as a compound page * of smaller order and then freed * again. All we know is that it * cannot have become: a THP page, a * compound page of higher order, a * tail page. That is because we * still hold the refcount of the * split THP tail and page_head was * the THP head before the split. */ if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by * get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on the * compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } } else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } }
/** * lru_cache_add - add a page to a page list * @page: the page to be added to the LRU. */ void lru_cache_add(struct page *page) { VM_BUG_ON(PageActive(page) && PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); __lru_cache_add(page); }
unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_color_align, last_mmap; struct vm_unmapped_area_info info; #ifdef CONFIG_64BIT /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); #endif /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; last_mmap = GET_LAST_MMAP(filp); if (flags & MAP_FIXED) { if ((flags & MAP_SHARED) && last_mmap && (addr - shared_align_offset(last_mmap, pgoff)) & (SHM_COLOUR - 1)) return -EINVAL; goto found_addr; } /* requesting a specific address */ if (addr) { if (do_color_align && last_mmap) addr = COLOR_ALIGN(addr, last_mmap, pgoff); else addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) goto found_addr; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0; info.align_offset = shared_align_offset(last_mmap, pgoff); addr = vm_unmapped_area(&info); if (!(addr & ~PAGE_MASK)) goto found_addr; VM_BUG_ON(addr != -ENOMEM); /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags); found_addr: if (do_color_align && !last_mmap && !(addr & ~PAGE_MASK)) SET_LAST_MMAP(filp, addr - (pgoff << PAGE_SHIFT)); return addr; }
/* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. * * The function has two checks before any action is taken - whether * a backend is registered and whether the sb->cleancache_poolid * is correct. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; if (!cleancache_ops) { cleancache_failed_gets++; goto out; } VM_BUG_ON(!PageLocked(page)); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) goto out; pool_id = get_poolid_from_fake(fake_pool_id); if (cleancache_get_key(page->mapping->host, &key) < 0) goto out; if (pool_id >= 0) ret = cleancache_ops->get_page(pool_id, key, page->index, page); if (ret == 0) cleancache_succ_gets++; else cleancache_failed_gets++; out: return ret; } EXPORT_SYMBOL(__cleancache_get_page); /* * "Put" data from a page to cleancache and associate it with the * (previously-obtained per-filesystem) poolid and the page's, * inode and page index. Page must be locked. Note that a put_page * always "succeeds", though a subsequent get_page may succeed or fail. * * The function has two checks before any action is taken - whether * a backend is registered and whether the sb->cleancache_poolid * is correct. */ void __cleancache_put_page(struct page *page) { int pool_id; int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; if (!cleancache_ops) { cleancache_puts++; return; } VM_BUG_ON(!PageLocked(page)); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) return; pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id >= 0 && cleancache_get_key(page->mapping->host, &key) >= 0) { cleancache_ops->put_page(pool_id, key, page->index, page); cleancache_puts++; } } EXPORT_SYMBOL(__cleancache_put_page); /* * Invalidate any data from cleancache associated with the poolid and the * page's inode and page index so that a subsequent "get" will fail. * * The function has two checks before any action is taken - whether * a backend is registered and whether the sb->cleancache_poolid * is correct. */ void __cleancache_invalidate_page(struct address_space *mapping, struct page *page) { /* careful... page->mapping is NULL sometimes when this is called */ int pool_id; int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; if (!cleancache_ops) return; if (fake_pool_id >= 0) { pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id < 0) return; VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { cleancache_ops->invalidate_page(pool_id, key, page->index); cleancache_invalidates++; } } } EXPORT_SYMBOL(__cleancache_invalidate_page); /* * Invalidate all data from cleancache associated with the poolid and the * mappings's inode so that all subsequent gets to this poolid/inode * will fail. * * The function has two checks before any action is taken - whether * a backend is registered and whether the sb->cleancache_poolid * is correct. */ void __cleancache_invalidate_inode(struct address_space *mapping) { int pool_id; int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; if (!cleancache_ops) return; if (fake_pool_id < 0) return; pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) cleancache_ops->invalidate_inode(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); /* * Called by any cleancache-enabled filesystem at time of unmount; * note that pool_id is surrendered and may be returned by a subsequent * cleancache_init_fs or cleancache_init_shared_fs. */ void __cleancache_invalidate_fs(struct super_block *sb) { int index; int fake_pool_id = sb->cleancache_poolid; int old_poolid = fake_pool_id; mutex_lock(&poolid_mutex); if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; old_poolid = shared_fs_poolid_map[index]; shared_fs_poolid_map[index] = FS_UNKNOWN; uuids[index] = NULL; } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { index = fake_pool_id - FAKE_FS_POOLID_OFFSET; old_poolid = fs_poolid_map[index]; fs_poolid_map[index] = FS_UNKNOWN; } sb->cleancache_poolid = -1; if (cleancache_ops) cleancache_ops->invalidate_fs(old_poolid); mutex_unlock(&poolid_mutex); }
/* * A commonly used alloc and free fn. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; VM_BUG_ON(mem->ctor); return kmem_cache_alloc(mem, gfp_mask); }
unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long task_size = STACK_TOP32; unsigned long addr = addr0; int do_color_align; struct vm_unmapped_area_info info; /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate * cache aliasing constraints. */ if ((flags & MAP_SHARED) && ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) return -EINVAL; return addr; } if (unlikely(len > task_size)) return -ENOMEM; do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; /* requesting a specific address */ if (addr) { if (do_color_align) addr = COLOR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; info.align_offset = pgoff << PAGE_SHIFT; addr = vm_unmapped_area(&info); /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ if (addr & ~PAGE_MASK) { VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = STACK_TOP32; addr = vm_unmapped_area(&info); } return addr; }