Exemple #1
0
static void check_mm(struct mm_struct *mm)
{
	int i;

	for (i = 0; i < NR_MM_COUNTERS; i++) {
		long x = atomic_long_read(&mm->rss_stat.count[i]);

		if (unlikely(x))
			printk(KERN_ALERT "BUG: Bad rss-counter state "
					  "mm:%p idx:%d val:%ld\n", mm, i, x);
	}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	VM_BUG_ON(mm->pmd_huge_pte);
#endif
}
void homecache_free_pages(unsigned long addr, unsigned int order)
{
	struct page *page;

	if (addr == 0)
		return;

	VM_BUG_ON(!virt_addr_valid((void *)addr));
	page = virt_to_page((void *)addr);
	if (put_page_testzero(page)) {
		int pages = (1 << order);
		homecache_change_page_home(page, order, initial_page_home());
		while (pages--)
			__free_page(page++);
	}
}
/*
 * This path almost never happens for VM activity - pages are normally
 * freed via pagevecs.  But it gets used by networking.
 */
static void __page_cache_release(struct page *page)
{
	if (PageLRU(page)) {
		unsigned long flags;
		struct zone *zone = page_zone(page);

		spin_lock_irqsave(&zone->lru_lock, flags);
		VM_BUG_ON(!PageLRU(page));
		__ClearPageLRU(page);
		del_page_from_lru(zone, page);
		spin_unlock_irqrestore(&zone->lru_lock, flags);
	} else if (PageIONBacked(page)) {
		ClearPageActive(page);
		ClearPageUnevictable(page);
	}
}
Exemple #4
0
/*
 * pagevec_release() for pages which are known to not be on the LRU
 *
 * This function reinitialises the caller's pagevec.
 */
void __pagevec_release_nonlru(struct pagevec *pvec)
{
	int i;
	struct pagevec pages_to_free;

	pagevec_init(&pages_to_free, pvec->cold);
	for (i = 0; i < pagevec_count(pvec); i++) {
		struct page *page = pvec->pages[i];

		VM_BUG_ON(PageLRU(page));
		if (put_page_testzero(page))
			pagevec_add(&pages_to_free, page);
	}
	pagevec_free(&pages_to_free);
	pagevec_reinit(pvec);
}
Exemple #5
0
/*
 * Batched page_cache_release().  Decrement the reference count on all the
 * passed pages.  If it fell to zero then remove the page from the LRU and
 * free it.
 *
 * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
 * for the remainder of the operation.
 *
 * The locking in this function is against shrink_inactive_list(): we recheck
 * the page count inside the lock to see whether shrink_inactive_list()
 * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
 * will free it.
 */
void release_pages(struct page **pages, int nr, int cold)
{
	int i;
	LIST_HEAD(pages_to_free);
	struct zone *zone = NULL;
	struct lruvec *lruvec;
	unsigned long uninitialized_var(flags);

	for (i = 0; i < nr; i++) {
		struct page *page = pages[i];

		if (unlikely(PageCompound(page))) {
			if (zone) {
				spin_unlock_irqrestore(&zone->lru_lock, flags);
				zone = NULL;
			}
			put_compound_page(page);
			continue;
		}

		if (!put_page_testzero(page))
			continue;

		if (PageLRU(page)) {
			struct zone *pagezone = page_zone(page);

			if (pagezone != zone) {
				if (zone)
					spin_unlock_irqrestore(&zone->lru_lock,
									flags);
				zone = pagezone;
				spin_lock_irqsave(&zone->lru_lock, flags);
			}

			lruvec = mem_cgroup_page_lruvec(page, zone);
			VM_BUG_ON(!PageLRU(page));
			__ClearPageLRU(page);
			del_page_from_lru_list(page, lruvec, page_off_lru(page));
		}

		list_add(&page->lru, &pages_to_free);
	}
	if (zone)
		spin_unlock_irqrestore(&zone->lru_lock, flags);

	free_hot_cold_page_list(&pages_to_free, cold);
}
Exemple #6
0
static int get_gate_page(struct mm_struct *mm, unsigned long address,
		unsigned int gup_flags, struct vm_area_struct **vma,
		struct page **page)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	int ret = -EFAULT;

	/* user gate pages are read-only */
	if (gup_flags & FOLL_WRITE)
		return -EFAULT;
	if (address > TASK_SIZE)
		pgd = pgd_offset_k(address);
	else
		pgd = pgd_offset_gate(mm, address);
	BUG_ON(pgd_none(*pgd));
	p4d = p4d_offset(pgd, address);
	BUG_ON(p4d_none(*p4d));
	pud = pud_offset(p4d, address);
	BUG_ON(pud_none(*pud));
	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return -EFAULT;
	VM_BUG_ON(pmd_trans_huge(*pmd));
	pte = pte_offset_map(pmd, address);
	if (pte_none(*pte))
		goto unmap;
	*vma = get_gate_vma(mm);
	if (!page)
		goto out;
	*page = vm_normal_page(*vma, address, *pte);
	if (!*page) {
		if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
			goto unmap;
		*page = pte_page(*pte);
	}
	get_page(*page);
out:
	ret = 0;
unmap:
	pte_unmap(pte);
	return ret;
}
Exemple #7
0
/*
 * The performance critical leaf functions are made noinline otherwise gcc
 * inlines everything into a single function which results in too much
 * register pressure.
 */
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
{
	unsigned long mask, result;
	pte_t *ptep;

	if (tlb_type == hypervisor) {
		result = _PAGE_PRESENT_4V|_PAGE_P_4V;
		if (write)
			result |= _PAGE_WRITE_4V;
	} else {
		result = _PAGE_PRESENT_4U|_PAGE_P_4U;
		if (write)
			result |= _PAGE_WRITE_4U;
	}
	mask = result | _PAGE_SPECIAL;

	ptep = pte_offset_kernel(&pmd, addr);
	do {
		struct page *page, *head;
		pte_t pte = *ptep;

		if ((pte_val(pte) & mask) != result)
			return 0;
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));

		/* The hugepage case is simplified on sparc64 because
		 * we encode the sub-page pfn offsets into the
		 * hugepage PTEs.  We could optimize this in the future
		 * use page_cache_add_speculative() for the hugepage case.
		 */
		page = pte_page(pte);
		head = compound_head(page);
		if (!page_cache_get_speculative(head))
			return 0;
		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
			put_page(head);
			return 0;
		}

		pages[*nr] = page;
		(*nr)++;
	} while (ptep++, addr += PAGE_SIZE, addr != end);

	return 1;
}
Exemple #8
0
int pmdp_set_access_flags(struct vm_area_struct *vma,
			  unsigned long address, pmd_t *pmdp,
			  pmd_t entry, int dirty)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	int changed = !pmd_same(*pmdp, entry);
	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	if (changed) {
		set_pmd_at(vma->vm_mm, address, pmdp, entry);
		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
	}
	return changed;
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
	BUG();
	return 0;
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
}
static unsigned long
hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
				  const unsigned long len,
				  const unsigned long pgoff,
				  const unsigned long flags,
				  const unsigned long offset)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr = addr0;
	struct vm_unmapped_area_info info;

	/* This should only ever run for 32-bit processes.  */
	BUG_ON(!test_thread_flag(TIF_32BIT));

	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
	info.length = len;
	info.low_limit = PAGE_SIZE;
	info.high_limit = mm->mmap_base;
	info.align_mask = PAGE_MASK & ~HPAGE_MASK;
	info.align_offset = 0;
	info.threadstack_offset = offset;
	addr = vm_unmapped_area(&info);

	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	if (addr & ~PAGE_MASK) {
		VM_BUG_ON(addr != -ENOMEM);
		info.flags = 0;
		info.low_limit = TASK_UNMAPPED_BASE;

#ifdef CONFIG_PAX_RANDMMAP
		if (mm->pax_flags & MF_PAX_RANDMMAP)
			info.low_limit += mm->delta_mmap;
#endif

		info.high_limit = STACK_TOP32;
		addr = vm_unmapped_area(&info);
	}

	return addr;
}
Exemple #10
0
/*
 * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
 * but sets SwapCache flag and private instead of mapping and index.
 */
int __add_to_swap_cache(struct page *page, swp_entry_t entry)
{
	int error, i, nr = hpage_nr_pages(page);
	struct address_space *address_space;
	pgoff_t idx = swp_offset(entry);

	VM_BUG_ON_PAGE(!PageLocked(page), page);
	VM_BUG_ON_PAGE(PageSwapCache(page), page);
	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);

	page_ref_add(page, nr);
	SetPageSwapCache(page);

	address_space = swap_address_space(entry);
	spin_lock_irq(&address_space->tree_lock);
	for (i = 0; i < nr; i++) {
		set_page_private(page + i, entry.val + i);
		error = radix_tree_insert(&address_space->page_tree,
					  idx + i, page + i);
		if (unlikely(error))
			break;
	}
	if (likely(!error)) {
		address_space->nrpages += nr;
		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
		ADD_CACHE_INFO(add_total, nr);
	} else {
		/*
		 * Only the context which have set SWAP_HAS_CACHE flag
		 * would call add_to_swap_cache().
		 * So add_to_swap_cache() doesn't returns -EEXIST.
		 */
		VM_BUG_ON(error == -EEXIST);
		set_page_private(page + i, 0UL);
		while (i--) {
			radix_tree_delete(&address_space->page_tree, idx + i);
			set_page_private(page + i, 0UL);
		}
		ClearPageSwapCache(page);
		page_ref_sub(page, nr);
	}
	spin_unlock_irq(&address_space->tree_lock);

	return error;
}
Exemple #11
0
/*
 * The performance critical leaf functions are made noinline otherwise gcc
 * inlines everything into a single function which results in too much
 * register pressure.
 */
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
{
	u64 mask, result;
	pte_t *ptep;

#ifdef CONFIG_X2TLB
	result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
	if (write)
		result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
#elif defined(CONFIG_SUPERH64)
	result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
	if (write)
		result |= _PAGE_WRITE;
#else
	result = _PAGE_PRESENT | _PAGE_USER;
	if (write)
		result |= _PAGE_RW;
#endif

	mask = result | _PAGE_SPECIAL;

	ptep = pte_offset_map(&pmd, addr);
	do {
		pte_t pte = gup_get_pte(ptep);
		struct page *page;

		if ((pte_val(pte) & mask) != result) {
			pte_unmap(ptep);
			return 0;
		}
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		page = pte_page(pte);
		get_page(page);
		__flush_anon_page(page, addr);
		flush_dcache_page(page);
		pages[*nr] = page;
		(*nr)++;

	} while (ptep++, addr += PAGE_SIZE, addr != end);
	pte_unmap(ptep - 1);

	return 1;
}
/*
 * "Get" data from cleancache associated with the poolid/inode/index
 * that were specified when the data was put to cleanache and, if
 * successful, use it to fill the specified page with data and return 0.
 * The pageframe is unchanged and returns -1 if the get fails.
 * Page must be locked by caller.
 */
int __cleancache_get_page(struct page *page)
{
	int ret = -1;
	int pool_id;

	VM_BUG_ON(!PageLocked(page));
	pool_id = page->mapping->host->i_sb->cleancache_poolid;
	if (pool_id >= 0) {
		ret = (*cleancache_ops.get_page)(pool_id,
						 page->mapping->host->i_ino,
						 page->index,
						 page);
		if (ret == 0)
			succ_gets++;
		else
			failed_gets++;
	}
	return ret;
}
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
{
	unsigned long mask, result;
	pte_t *ptep;

	if (tlb_type == hypervisor) {
		result = _PAGE_PRESENT_4V|_PAGE_P_4V;
		if (write)
			result |= _PAGE_WRITE_4V;
	} else {
		result = _PAGE_PRESENT_4U|_PAGE_P_4U;
		if (write)
			result |= _PAGE_WRITE_4U;
	}
	mask = result | _PAGE_SPECIAL;

	ptep = pte_offset_kernel(&pmd, addr);
	do {
		struct page *page, *head;
		pte_t pte = *ptep;

		if ((pte_val(pte) & mask) != result)
			return 0;
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));

		page = pte_page(pte);
		head = compound_head(page);
		if (!page_cache_get_speculative(head))
			return 0;
		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
			put_page(head);
			return 0;
		}
		if (head != page)
			get_huge_page_tail(page);

		pages[*nr] = page;
		(*nr)++;
	} while (ptep++, addr += PAGE_SIZE, addr != end);

	return 1;
}
Exemple #14
0
void copy_mm_to_paca(struct mm_struct *mm)
{
#ifdef CONFIG_PPC_BOOK3S
	mm_context_t *context = &mm->context;

	get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
	VM_BUG_ON(!mm->context.addr_limit);
	get_paca()->addr_limit = mm->context.addr_limit;
	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
	memcpy(&get_paca()->mm_ctx_high_slices_psize,
	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
	get_paca()->mm_ctx_user_psize = context->user_psize;
	get_paca()->mm_ctx_sllp = context->sllp;
#endif
#else /* CONFIG_PPC_BOOK3S */
	return;
#endif
}
Exemple #15
0
static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
{
	pfn_t pfn = *pfnp;
	gfn_t gfn = *ipap >> PAGE_SHIFT;

	if (PageTransCompound(pfn_to_page(pfn))) {
		unsigned long mask;
		/*
		 * The address we faulted on is backed by a transparent huge
		 * page.  However, because we map the compound huge page and
		 * not the individual tail page, we need to transfer the
		 * refcount to the head page.  We have to be careful that the
		 * THP doesn't start to split while we are adjusting the
		 * refcounts.
		 *
		 * We are sure this doesn't happen, because mmu_notifier_retry
		 * was successful and we are holding the mmu_lock, so if this
		 * THP is trying to split, it will be blocked in the mmu
		 * notifier before touching any of the pages, specifically
		 * before being able to call __split_huge_page_refcount().
		 *
		 * We can therefore safely transfer the refcount from PG_tail
		 * to PG_head and switch the pfn from a tail page to the head
		 * page accordingly.
		 */
		mask = PTRS_PER_PMD - 1;
		VM_BUG_ON((gfn & mask) != (pfn & mask));
		if (pfn & mask) {
			*ipap &= PMD_MASK;
			kvm_release_pfn_clean(pfn);
			pfn &= ~mask;
			kvm_get_pfn(pfn);
			*pfnp = pfn;
		}

		return true;
	}

	return false;
}
Exemple #16
0
/**
 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
 * @kvm:	The KVM struct pointer for the VM.
 *
 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
 * support either full 40-bit input addresses or limited to 32-bit input
 * addresses). Clears the allocated pages.
 *
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
	pgd_t *pgd;

	if (kvm->arch.pgd != NULL) {
		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
	if (!pgd)
		return -ENOMEM;

	/* stage-2 pgd must be aligned to its size */
	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));

	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
	kvm_clean_pgd(pgd);
	kvm->arch.pgd = pgd;

	return 0;
}
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
							unsigned long addr,
							unsigned long len,
							unsigned long pgoff,
							unsigned long flags,
							unsigned long offset)
{
	struct mm_struct *mm = current->mm;
	unsigned long task_size = TASK_SIZE;
	struct vm_unmapped_area_info info;

	if (test_thread_flag(TIF_32BIT))
		task_size = STACK_TOP32;

	info.flags = 0;
	info.length = len;
	info.low_limit = mm->mmap_base;
	info.high_limit = min(task_size, VA_EXCLUDE_START);
	info.align_mask = PAGE_MASK & ~HPAGE_MASK;
	info.align_offset = 0;
	info.threadstack_offset = offset;
	addr = vm_unmapped_area(&info);

	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
		VM_BUG_ON(addr != -ENOMEM);
		info.low_limit = VA_EXCLUDE_END;

#ifdef CONFIG_PAX_RANDMMAP
		if (mm->pax_flags & MF_PAX_RANDMMAP)
			info.low_limit += mm->delta_mmap;
#endif

		info.high_limit = task_size;
		addr = vm_unmapped_area(&info);
	}

	return addr;
}
Exemple #18
0
/*
 * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
 * but sets SwapCache flag and private instead of mapping and index.
 */
int __add_to_swap_cache(struct page *page, swp_entry_t entry)
{
    int error;
    struct address_space *address_space;

    VM_BUG_ON_PAGE(!PageLocked(page), page);
    VM_BUG_ON_PAGE(PageSwapCache(page), page);
    VM_BUG_ON_PAGE(!PageSwapBacked(page), page);

    get_page(page);
    SetPageSwapCache(page);
    set_page_private(page, entry.val);

    address_space = swap_address_space(entry);
    spin_lock_irq(&address_space->tree_lock);
    error = radix_tree_insert(&address_space->page_tree,
                              entry.val, page);
    if (likely(!error)) {
        address_space->nrpages++;
        __inc_zone_page_state(page, NR_FILE_PAGES);
        INC_CACHE_INFO(add_total);
    }
    spin_unlock_irq(&address_space->tree_lock);

    if (unlikely(error)) {
        /*
         * Only the context which have set SWAP_HAS_CACHE flag
         * would call add_to_swap_cache().
         * So add_to_swap_cache() doesn't returns -EEXIST.
         */
        VM_BUG_ON(error == -EEXIST);
        set_page_private(page, 0UL);
        ClearPageSwapCache(page);
        put_page(page);
    }

    return error;
}
Exemple #19
0
/*
 * The performance critical leaf functions are made noinline otherwise gcc
 * inlines everything into a single function which results in too much
 * register pressure.
 */
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
{
	unsigned long mask, result;
	pte_t *ptep;

	result = _PAGE_PRESENT|_PAGE_USER;
	if (write)
		result |= _PAGE_RW;
	mask = result | _PAGE_SPECIAL;

	ptep = pte_offset_kernel(&pmd, addr);
	do {
		pte_t pte = READ_ONCE(*ptep);
		struct page *page;
		/*
		 * Similar to the PMD case, NUMA hinting must take slow path
		 */
		if (pte_numa(pte))
			return 0;

		if ((pte_val(pte) & mask) != result)
			return 0;
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		page = pte_page(pte);
		if (!page_cache_get_speculative(page))
			return 0;
		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
			put_page(page);
			return 0;
		}
		pages[*nr] = page;
		(*nr)++;

	} while (ptep++, addr += PAGE_SIZE, addr != end);

	return 1;
}
/**
 * dma_release_from_contiguous() - release allocated pages
 * @dev:   Pointer to device for which the pages were allocated.
 * @pages: Allocated pages.
 * @count: Number of allocated pages.
 *
 * This function releases memory allocated by dma_alloc_from_contiguous().
 * It returns false when provided pages do not belong to contiguous area and
 * true otherwise.
 */
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
				 int count)
{
	struct cma *cma = dev_get_cma_area(dev);
	unsigned long pfn;

	if (!cma || !pages)
		return false;

	pr_debug("%s(page %p)\n", __func__, (void *)pages);

	pfn = page_to_pfn(pages);

	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
		return false;

	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);

	free_contig_range(pfn, count);
	clear_cma_bitmap(cma, pfn, count);

	return true;
}
Exemple #21
0
/* used by __split_huge_page_refcount() */
void lru_add_page_tail(struct page *page, struct page *page_tail,
		       struct lruvec *lruvec, struct list_head *list)
{
	const int file = 0;

	VM_BUG_ON_PAGE(!PageHead(page), page);
	VM_BUG_ON_PAGE(PageCompound(page_tail), page);
	VM_BUG_ON_PAGE(PageLRU(page_tail), page);
	VM_BUG_ON(NR_CPUS != 1 &&
		  !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock));

	if (!list)
		SetPageLRU(page_tail);

	if (likely(PageLRU(page)))
		list_add_tail(&page_tail->lru, &page->lru);
	else if (list) {
		/* page reclaim is reclaiming a huge page */
		get_page(page_tail);
		list_add_tail(&page_tail->lru, list);
	} else {
		struct list_head *list_head;
		/*
		 * Head page has not yet been counted, as an hpage,
		 * so we must account for each subpage individually.
		 *
		 * Use the standard add function to put page_tail on the list,
		 * but then correct its position so they all end up in order.
		 */
		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
		list_head = page_tail->lru.prev;
		list_move_tail(&page_tail->lru, list_head);
	}

	if (!PageUnevictable(page))
		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
}
Exemple #22
0
/*
 * Add the passed pages to the LRU, then drop the caller's refcount
 * on them.  Reinitialises the caller's pagevec.
 */
void __pagevec_lru_add(struct pagevec *pvec)
{
    int i;
    struct zone *zone = NULL;

    for (i = 0; i < pagevec_count(pvec); i++) {
        struct page *page = pvec->pages[i];
        struct zone *pagezone = page_zone(page);

        if (pagezone != zone) {
            if (zone)
                spin_unlock_irq(&zone->lru_lock);
            zone = pagezone;
            spin_lock_irq(&zone->lru_lock);
        }
        VM_BUG_ON(PageLRU(page));
        SetPageLRU(page);
        add_page_to_inactive_list(zone, page);
    }
    if (zone)
        spin_unlock_irq(&zone->lru_lock);
    release_pages(pvec->pages, pvec->nr, pvec->cold);
    pagevec_reinit(pvec);
}
/*
 * Hacked from kernel function __get_user_pages in mm/memory.c
 *
 * Handle buffers allocated by other kernel space driver and mmaped into user
 * space, function Ignore the VM_PFNMAP and VM_IO flag in VMA structure
 *
 * Get physical pages from user space virtual address and update into page list
 */
static int __get_pfnmap_pages(struct task_struct *tsk, struct mm_struct *mm,
			      unsigned long start, int nr_pages,
			      unsigned int gup_flags, struct page **pages,
			      struct vm_area_struct **vmas)
{
	int i, ret;
	unsigned long vm_flags;

	if (nr_pages <= 0)
		return 0;

	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));

	/*
	 * Require read or write permissions.
	 * If FOLL_FORCE is set, we only require the "MAY" flags.
	 */
	vm_flags  = (gup_flags & FOLL_WRITE) ?
			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
	vm_flags &= (gup_flags & FOLL_FORCE) ?
			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
	i = 0;

	do {
		struct vm_area_struct *vma;

		vma = find_vma(mm, start);
		if (!vma) {
			dev_err(atomisp_dev, "find_vma failed\n");
			return i ? : -EFAULT;
		}

		if (is_vm_hugetlb_page(vma)) {
			/*
			i = follow_hugetlb_page(mm, vma, pages, vmas,
					&start, &nr_pages, i, gup_flags);
			*/
			continue;
		}

		do {
			struct page *page;
			unsigned long pfn;

			/*
			 * If we have a pending SIGKILL, don't keep faulting
			 * pages and potentially allocating memory.
			 */
			if (unlikely(fatal_signal_pending(current))) {
				dev_err(atomisp_dev,
					"fatal_signal_pending in %s\n",
					__func__);
				return i ? i : -ERESTARTSYS;
			}

			ret = follow_pfn(vma, start, &pfn);
			if (ret) {
				dev_err(atomisp_dev, "follow_pfn() failed\n");
				return i ? : -EFAULT;
			}

			page = pfn_to_page(pfn);
			if (IS_ERR(page))
				return i ? i : PTR_ERR(page);
			if (pages) {
				pages[i] = page;
				get_page(page);
				flush_anon_page(vma, page, start);
				flush_dcache_page(page);
			}
			if (vmas)
				vmas[i] = vma;
			i++;
			start += PAGE_SIZE;
			nr_pages--;
		} while (nr_pages && start < vma->vm_end);
	} while (nr_pages);
/*
 * "Get" data from cleancache associated with the poolid/inode/index
 * that were specified when the data was put to cleanache and, if
 * successful, use it to fill the specified page with data and return 0.
 * The pageframe is unchanged and returns -1 if the get fails.
 * Page must be locked by caller.
 */
int __cleancache_get_page(struct page *page)
{
	int ret = -1;
	int pool_id;
	struct cleancache_filekey key = { .u.key = { 0 } };

	VM_BUG_ON(!PageLocked(page));
	pool_id = page->mapping->host->i_sb->cleancache_poolid;
	if (pool_id < 0)
		goto out;

	if (cleancache_get_key(page->mapping->host, &key) < 0)
		goto out;

	ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
	if (ret == 0)
		cleancache_succ_gets++;
	else
		cleancache_failed_gets++;
out:
	return ret;
}
EXPORT_SYMBOL(__cleancache_get_page);

/*
 * "Put" data from a page to cleancache and associate it with the
 * (previously-obtained per-filesystem) poolid and the page's,
 * inode and page index.  Page must be locked.  Note that a put_page
 * always "succeeds", though a subsequent get_page may succeed or fail.
 */
void __cleancache_put_page(struct page *page)
{
	int pool_id;
	struct cleancache_filekey key = { .u.key = { 0 } };

	VM_BUG_ON(!PageLocked(page));
	pool_id = page->mapping->host->i_sb->cleancache_poolid;
	if (pool_id >= 0 &&
	      cleancache_get_key(page->mapping->host, &key) >= 0) {
		(*cleancache_ops.put_page)(pool_id, key, page->index, page);
		cleancache_puts++;
	}
}
EXPORT_SYMBOL(__cleancache_put_page);

/*
 * Invalidate any data from cleancache associated with the poolid and the
 * page's inode and page index so that a subsequent "get" will fail.
 */
void __cleancache_invalidate_page(struct address_space *mapping,
					struct page *page)
{
	/* careful... page->mapping is NULL sometimes when this is called */
	int pool_id = mapping->host->i_sb->cleancache_poolid;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (pool_id >= 0) {
		VM_BUG_ON(!PageLocked(page));
		if (cleancache_get_key(mapping->host, &key) >= 0) {
			(*cleancache_ops.invalidate_page)(pool_id, key, page->index);
			cleancache_flushes++;
		}
	}
}
EXPORT_SYMBOL(__cleancache_invalidate_page);

/*
 * Invalidate all data from cleancache associated with the poolid and the
 * mappings's inode so that all subsequent gets to this poolid/inode
 * will fail.
 */
void __cleancache_invalidate_inode(struct address_space *mapping)
{
	int pool_id = mapping->host->i_sb->cleancache_poolid;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
		(*cleancache_ops.invalidate_inode)(pool_id, key);
}
EXPORT_SYMBOL(__cleancache_invalidate_inode);

/*
 * Called by any cleancache-enabled filesystem at time of unmount;
 * note that pool_id is surrendered and may be reutrned by a subsequent
 * cleancache_init_fs or cleancache_init_shared_fs
 */
void __cleancache_invalidate_fs(struct super_block *sb)
{
	if (sb->cleancache_poolid >= 0) {
		int old_poolid = sb->cleancache_poolid;
		sb->cleancache_poolid = -1;
		(*cleancache_ops.invalidate_fs)(old_poolid);
	}
}
EXPORT_SYMBOL(__cleancache_invalidate_fs);

#ifdef CONFIG_SYSFS

/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */

#define CLEANCACHE_SYSFS_RO(_name) \
	static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
				struct kobj_attribute *attr, char *buf) \
	{ \
		return sprintf(buf, "%lu\n", cleancache_##_name); \
	} \
	static struct kobj_attribute cleancache_##_name##_attr = { \
		.attr = { .name = __stringify(_name), .mode = 0444 }, \
		.show = cleancache_##_name##_show, \
	}

CLEANCACHE_SYSFS_RO(succ_gets);
CLEANCACHE_SYSFS_RO(failed_gets);
CLEANCACHE_SYSFS_RO(puts);
CLEANCACHE_SYSFS_RO(flushes);

static struct attribute *cleancache_attrs[] = {
	&cleancache_succ_gets_attr.attr,
	&cleancache_failed_gets_attr.attr,
	&cleancache_puts_attr.attr,
	&cleancache_flushes_attr.attr,
	NULL,
};

static struct attribute_group cleancache_attr_group = {
	.attrs = cleancache_attrs,
	.name = "cleancache",
};

#endif /* CONFIG_SYSFS */

static int __init init_cleancache(void)
{
#ifdef CONFIG_SYSFS
	int err;

	err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
#endif /* CONFIG_SYSFS */
	return 0;
}
module_init(init_cleancache)
Exemple #25
0
static void put_compound_page(struct page *page)
{
	struct page *page_head;

	if (likely(!PageTail(page))) {
		if (put_page_testzero(page)) {
			/*
			 * By the time all refcounts have been released
			 * split_huge_page cannot run anymore from under us.
			 */
			if (PageHead(page))
				__put_compound_page(page);
			else
				__put_single_page(page);
		}
		return;
	}

	/* __split_huge_page_refcount can run under us */
	page_head = compound_trans_head(page);

	/*
	 * THP can not break up slab pages so avoid taking
	 * compound_lock() and skip the tail page refcounting (in
	 * _mapcount) too. Slab performs non-atomic bit ops on
	 * page->flags for better performance. In particular
	 * slab_unlock() in slub used to be a hot path. It is still
	 * hot on arches that do not support
	 * this_cpu_cmpxchg_double().
	 *
	 * If "page" is part of a slab or hugetlbfs page it cannot be
	 * splitted and the head page cannot change from under us. And
	 * if "page" is part of a THP page under splitting, if the
	 * head page pointed by the THP tail isn't a THP head anymore,
	 * we'll find PageTail clear after smp_rmb() and we'll treat
	 * it as a single page.
	 */
	if (!__compound_tail_refcounted(page_head)) {
		/*
		 * If "page" is a THP tail, we must read the tail page
		 * flags after the head page flags. The
		 * split_huge_page side enforces write memory barriers
		 * between clearing PageTail and before the head page
		 * can be freed and reallocated.
		 */
		smp_rmb();
		if (likely(PageTail(page))) {
			/*
			 * __split_huge_page_refcount cannot race
			 * here.
			 */
			VM_BUG_ON(!PageHead(page_head));
			VM_BUG_ON(page_mapcount(page) != 0);
			if (put_page_testzero(page_head)) {
				/*
				 * If this is the tail of a slab
				 * compound page, the tail pin must
				 * not be the last reference held on
				 * the page, because the PG_slab
				 * cannot be cleared before all tail
				 * pins (which skips the _mapcount
				 * tail refcounting) have been
				 * released. For hugetlbfs the tail
				 * pin may be the last reference on
				 * the page instead, because
				 * PageHeadHuge will not go away until
				 * the compound page enters the buddy
				 * allocator.
				 */
				VM_BUG_ON(PageSlab(page_head));
				__put_compound_page(page_head);
			}
			return;
		} else
			/*
			 * __split_huge_page_refcount run before us,
			 * "page" was a THP tail. The split page_head
			 * has been freed and reallocated as slab or
			 * hugetlbfs page of smaller order (only
			 * possible if reallocated as slab on x86).
			 */
			goto out_put_single;
	}

	if (likely(page != page_head && get_page_unless_zero(page_head))) {
		unsigned long flags;

		/*
		 * page_head wasn't a dangling pointer but it may not
		 * be a head page anymore by the time we obtain the
		 * lock. That is ok as long as it can't be freed from
		 * under us.
		 */
		flags = compound_lock_irqsave(page_head);
		if (unlikely(!PageTail(page))) {
			/* __split_huge_page_refcount run before us */
			compound_unlock_irqrestore(page_head, flags);
			if (put_page_testzero(page_head)) {
				/*
				 * The head page may have been freed
				 * and reallocated as a compound page
				 * of smaller order and then freed
				 * again.  All we know is that it
				 * cannot have become: a THP page, a
				 * compound page of higher order, a
				 * tail page.  That is because we
				 * still hold the refcount of the
				 * split THP tail and page_head was
				 * the THP head before the split.
				 */
				if (PageHead(page_head))
					__put_compound_page(page_head);
				else
					__put_single_page(page_head);
			}
out_put_single:
			if (put_page_testzero(page))
				__put_single_page(page);
			return;
		}
		VM_BUG_ON(page_head != page->first_page);
		/*
		 * We can release the refcount taken by
		 * get_page_unless_zero() now that
		 * __split_huge_page_refcount() is blocked on the
		 * compound_lock.
		 */
		if (put_page_testzero(page_head))
			VM_BUG_ON(1);
		/* __split_huge_page_refcount will wait now */
		VM_BUG_ON(page_mapcount(page) <= 0);
		atomic_dec(&page->_mapcount);
		VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
		VM_BUG_ON(atomic_read(&page->_count) != 0);
		compound_unlock_irqrestore(page_head, flags);

		if (put_page_testzero(page_head)) {
			if (PageHead(page_head))
				__put_compound_page(page_head);
			else
				__put_single_page(page_head);
		}
	} else {
		/* page_head is a dangling pointer */
		VM_BUG_ON(PageTail(page));
		goto out_put_single;
	}
}
Exemple #26
0
/**
 * lru_cache_add - add a page to a page list
 * @page: the page to be added to the LRU.
 */
void lru_cache_add(struct page *page)
{
	VM_BUG_ON(PageActive(page) && PageUnevictable(page));
	VM_BUG_ON(PageLRU(page));
	__lru_cache_add(page);
}
Exemple #27
0
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
			  const unsigned long len, const unsigned long pgoff,
			  const unsigned long flags)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;
	unsigned long addr = addr0;
	int do_color_align, last_mmap;
	struct vm_unmapped_area_info info;

#ifdef CONFIG_64BIT
	/* This should only ever run for 32-bit processes.  */
	BUG_ON(!test_thread_flag(TIF_32BIT));
#endif

	/* requested length too big for entire address space */
	if (len > TASK_SIZE)
		return -ENOMEM;

	do_color_align = 0;
	if (filp || (flags & MAP_SHARED))
		do_color_align = 1;
	last_mmap = GET_LAST_MMAP(filp);

	if (flags & MAP_FIXED) {
		if ((flags & MAP_SHARED) && last_mmap &&
		    (addr - shared_align_offset(last_mmap, pgoff))
			& (SHM_COLOUR - 1))
			return -EINVAL;
		goto found_addr;
	}

	/* requesting a specific address */
	if (addr) {
		if (do_color_align && last_mmap)
			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
		else
			addr = PAGE_ALIGN(addr);
		vma = find_vma(mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			goto found_addr;
	}

	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
	info.length = len;
	info.low_limit = PAGE_SIZE;
	info.high_limit = mm->mmap_base;
	info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
	info.align_offset = shared_align_offset(last_mmap, pgoff);
	addr = vm_unmapped_area(&info);
	if (!(addr & ~PAGE_MASK))
		goto found_addr;
	VM_BUG_ON(addr != -ENOMEM);

	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);

found_addr:
	if (do_color_align && !last_mmap && !(addr & ~PAGE_MASK))
		SET_LAST_MMAP(filp, addr - (pgoff << PAGE_SHIFT));

	return addr;
}
/*
 * "Get" data from cleancache associated with the poolid/inode/index
 * that were specified when the data was put to cleanache and, if
 * successful, use it to fill the specified page with data and return 0.
 * The pageframe is unchanged and returns -1 if the get fails.
 * Page must be locked by caller.
 *
 * The function has two checks before any action is taken - whether
 * a backend is registered and whether the sb->cleancache_poolid
 * is correct.
 */
int __cleancache_get_page(struct page *page)
{
	int ret = -1;
	int pool_id;
	int fake_pool_id;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (!cleancache_ops) {
		cleancache_failed_gets++;
		goto out;
	}

	VM_BUG_ON(!PageLocked(page));
	fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
	if (fake_pool_id < 0)
		goto out;
	pool_id = get_poolid_from_fake(fake_pool_id);

	if (cleancache_get_key(page->mapping->host, &key) < 0)
		goto out;

	if (pool_id >= 0)
		ret = cleancache_ops->get_page(pool_id,
				key, page->index, page);
	if (ret == 0)
		cleancache_succ_gets++;
	else
		cleancache_failed_gets++;
out:
	return ret;
}
EXPORT_SYMBOL(__cleancache_get_page);

/*
 * "Put" data from a page to cleancache and associate it with the
 * (previously-obtained per-filesystem) poolid and the page's,
 * inode and page index.  Page must be locked.  Note that a put_page
 * always "succeeds", though a subsequent get_page may succeed or fail.
 *
 * The function has two checks before any action is taken - whether
 * a backend is registered and whether the sb->cleancache_poolid
 * is correct.
 */
void __cleancache_put_page(struct page *page)
{
	int pool_id;
	int fake_pool_id;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (!cleancache_ops) {
		cleancache_puts++;
		return;
	}

	VM_BUG_ON(!PageLocked(page));
	fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
	if (fake_pool_id < 0)
		return;

	pool_id = get_poolid_from_fake(fake_pool_id);

	if (pool_id >= 0 &&
		cleancache_get_key(page->mapping->host, &key) >= 0) {
		cleancache_ops->put_page(pool_id, key, page->index, page);
		cleancache_puts++;
	}
}
EXPORT_SYMBOL(__cleancache_put_page);

/*
 * Invalidate any data from cleancache associated with the poolid and the
 * page's inode and page index so that a subsequent "get" will fail.
 *
 * The function has two checks before any action is taken - whether
 * a backend is registered and whether the sb->cleancache_poolid
 * is correct.
 */
void __cleancache_invalidate_page(struct address_space *mapping,
					struct page *page)
{
	/* careful... page->mapping is NULL sometimes when this is called */
	int pool_id;
	int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (!cleancache_ops)
		return;

	if (fake_pool_id >= 0) {
		pool_id = get_poolid_from_fake(fake_pool_id);
		if (pool_id < 0)
			return;

		VM_BUG_ON(!PageLocked(page));
		if (cleancache_get_key(mapping->host, &key) >= 0) {
			cleancache_ops->invalidate_page(pool_id,
					key, page->index);
			cleancache_invalidates++;
		}
	}
}
EXPORT_SYMBOL(__cleancache_invalidate_page);

/*
 * Invalidate all data from cleancache associated with the poolid and the
 * mappings's inode so that all subsequent gets to this poolid/inode
 * will fail.
 *
 * The function has two checks before any action is taken - whether
 * a backend is registered and whether the sb->cleancache_poolid
 * is correct.
 */
void __cleancache_invalidate_inode(struct address_space *mapping)
{
	int pool_id;
	int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
	struct cleancache_filekey key = { .u.key = { 0 } };

	if (!cleancache_ops)
		return;

	if (fake_pool_id < 0)
		return;

	pool_id = get_poolid_from_fake(fake_pool_id);

	if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
		cleancache_ops->invalidate_inode(pool_id, key);
}
EXPORT_SYMBOL(__cleancache_invalidate_inode);

/*
 * Called by any cleancache-enabled filesystem at time of unmount;
 * note that pool_id is surrendered and may be returned by a subsequent
 * cleancache_init_fs or cleancache_init_shared_fs.
 */
void __cleancache_invalidate_fs(struct super_block *sb)
{
	int index;
	int fake_pool_id = sb->cleancache_poolid;
	int old_poolid = fake_pool_id;

	mutex_lock(&poolid_mutex);
	if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) {
		index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET;
		old_poolid = shared_fs_poolid_map[index];
		shared_fs_poolid_map[index] = FS_UNKNOWN;
		uuids[index] = NULL;
	} else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
		index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
		old_poolid = fs_poolid_map[index];
		fs_poolid_map[index] = FS_UNKNOWN;
	}
	sb->cleancache_poolid = -1;
	if (cleancache_ops)
		cleancache_ops->invalidate_fs(old_poolid);
	mutex_unlock(&poolid_mutex);
}
Exemple #29
0
/*
 * A commonly used alloc and free fn.
 */
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
	struct kmem_cache *mem = pool_data;
	VM_BUG_ON(mem->ctor);
	return kmem_cache_alloc(mem, gfp_mask);
}
Exemple #30
0
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
			  const unsigned long len, const unsigned long pgoff,
			  const unsigned long flags)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;
	unsigned long task_size = STACK_TOP32;
	unsigned long addr = addr0;
	int do_color_align;
	struct vm_unmapped_area_info info;

	/* This should only ever run for 32-bit processes.  */
	BUG_ON(!test_thread_flag(TIF_32BIT));

	if (flags & MAP_FIXED) {
		/* We do not accept a shared mapping if it would violate
		 * cache aliasing constraints.
		 */
		if ((flags & MAP_SHARED) &&
		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
			return -EINVAL;
		return addr;
	}

	if (unlikely(len > task_size))
		return -ENOMEM;

	do_color_align = 0;
	if (filp || (flags & MAP_SHARED))
		do_color_align = 1;

	/* requesting a specific address */
	if (addr) {
		if (do_color_align)
			addr = COLOR_ALIGN(addr, pgoff);
		else
			addr = PAGE_ALIGN(addr);

		vma = find_vma(mm, addr);
		if (task_size - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}

	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
	info.length = len;
	info.low_limit = PAGE_SIZE;
	info.high_limit = mm->mmap_base;
	info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
	info.align_offset = pgoff << PAGE_SHIFT;
	addr = vm_unmapped_area(&info);

	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	if (addr & ~PAGE_MASK) {
		VM_BUG_ON(addr != -ENOMEM);
		info.flags = 0;
		info.low_limit = TASK_UNMAPPED_BASE;
		info.high_limit = STACK_TOP32;
		addr = vm_unmapped_area(&info);
	}

	return addr;
}