Esempio n. 1
0
/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 *
 * This is called with the MM semaphore held and the page table
 * spinlock held. Exit with the spinlock released.
 */
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
	unsigned long address, int write_access, pte_t *page_table)
{
	struct page * new_page;
	pte_t entry;

	if (!vma->vm_ops || !vma->vm_ops->nopage)
		return do_anonymous_page(mm, vma, page_table, write_access, address);
	spin_unlock(&mm->page_table_lock);

	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);

	if (new_page == NULL)	/* no page was available -- SIGBUS */
		return 0;
	if (new_page == NOPAGE_OOM)
		return -1;

	/*
	 * Should we do an early C-O-W break?
	 */
	if (write_access && !(vma->vm_flags & VM_SHARED)) {
		struct page * page = alloc_page(GFP_HIGHUSER);
		if (!page) {
			page_cache_release(new_page);
			return -1;
		}
		copy_highpage(page, new_page);
		page_cache_release(new_page);
		lru_cache_add(page);
		new_page = page;
	}

	spin_lock(&mm->page_table_lock);
	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * an exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	/* Only go through if we didn't race with anybody else... */
	if (pte_none(*page_table)) {
		++mm->rss;
		flush_page_to_ram(new_page);
		flush_icache_page(vma, new_page);
		entry = mk_pte(new_page, vma->vm_page_prot);
		if (write_access)
			entry = pte_mkwrite(pte_mkdirty(entry));
		set_pte(page_table, entry);
	} else {
		/* One of our sibling threads was faster, back out. */
		page_cache_release(new_page);
		spin_unlock(&mm->page_table_lock);
		return 1;
	}

	/* no need to invalidate: a not-present page shouldn't be cached */
	update_mmu_cache(vma, address, entry);
	spin_unlock(&mm->page_table_lock);
	return 2;	/* Major fault */
}
Esempio n. 2
0
/*
 * We hold the mm semaphore and the page_table_lock on entry and
 * should release the pagetable lock on exit..
 */
static int do_swap_page(struct mm_struct * mm,
	struct vm_area_struct * vma, unsigned long address,
	pte_t * page_table, pte_t orig_pte, int write_access)
{
	struct page *page;
	swp_entry_t entry = pte_to_swp_entry(orig_pte);
	pte_t pte;
	int ret = 1;

	spin_unlock(&mm->page_table_lock);
	page = lookup_swap_cache(entry);
	if (!page) {
		swapin_readahead(entry);
		page = read_swap_cache_async(entry);
		if (!page) {
			/*
			 * Back out if somebody else faulted in this pte while
			 * we released the page table lock.
			 */
			int retval;
			spin_lock(&mm->page_table_lock);
			retval = pte_same(*page_table, orig_pte) ? -1 : 1;
			spin_unlock(&mm->page_table_lock);
			return retval;
		}

		/* Had to read the page from swap area: Major fault */
		ret = 2;
	}

	lock_page(page);

	/*
	 * Back out if somebody else faulted in this pte while we
	 * released the page table lock.
	 */
	spin_lock(&mm->page_table_lock);
	if (!pte_same(*page_table, orig_pte)) {
		spin_unlock(&mm->page_table_lock);
		unlock_page(page);
		page_cache_release(page);
		return 1;
	}

	/* The page isn't present yet, go ahead with the fault. */
		
	swap_free(entry);
	if (vm_swap_full())
		remove_exclusive_swap_page(page);

	mm->rss++;
	pte = mk_pte(page, vma->vm_page_prot);
	if (write_access && can_share_swap_page(page))
		pte = pte_mkdirty(pte_mkwrite(pte));
	unlock_page(page);

	flush_page_to_ram(page);
	flush_icache_page(vma, page);
	set_pte(page_table, pte);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, address, pte);
	spin_unlock(&mm->page_table_lock);
	return ret;
}
Esempio n. 3
0
/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 */
void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
	unsigned long address, int write_access)
{
	pgd_t * pgd;
	pmd_t * pmd;
	pte_t * page_table;
	pte_t entry;
	unsigned long page;

	pgd = pgd_offset(tsk->mm, address);
	pmd = pmd_alloc(pgd, address);
	if (!pmd)
		goto no_memory;
	page_table = pte_alloc(pmd, address);
	if (!page_table)
		goto no_memory;
	entry = *page_table;
	if (pte_present(entry))
		goto is_present;
	if (!pte_none(entry))
		goto swap_page;
	address &= PAGE_MASK;
	if (!vma->vm_ops || !vma->vm_ops->nopage)
		goto anonymous_page;
	/*
	 * The third argument is "no_share", which tells the low-level code
	 * to copy, not share the page even if sharing is possible.  It's
	 * essentially an early COW detection 
	 */
	page = vma->vm_ops->nopage(vma, address, 
		(vma->vm_flags & VM_SHARED)?0:write_access);
	if (!page)
		goto sigbus;
	++tsk->maj_flt;
	++vma->vm_mm->rss;
	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * a exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	flush_page_to_ram(page);
	entry = mk_pte(page, vma->vm_page_prot);
	if (write_access) {
		entry = pte_mkwrite(pte_mkdirty(entry));
	} else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
		entry = pte_wrprotect(entry);
	put_page(page_table, entry);
	/* no need to invalidate: a not-present page shouldn't be cached */
	return;

anonymous_page:
	entry = pte_wrprotect(mk_pte(ZERO_PAGE, vma->vm_page_prot));
	if (write_access) {
		unsigned long page = __get_free_page(GFP_KERNEL);
		if (!page)
			goto sigbus;
		memset((void *) page, 0, PAGE_SIZE);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		vma->vm_mm->rss++;
		tsk->min_flt++;
		flush_page_to_ram(page);
	}
	put_page(page_table, entry);
	return;

sigbus:
	force_sig(SIGBUS, current);
	put_page(page_table, BAD_PAGE);
	/* no need to invalidate, wasn't present */
	return;

swap_page:
	do_swap_page(tsk, vma, address, page_table, entry, write_access);
	return;

no_memory:
	oom(tsk);
is_present:
	return;
}
Esempio n. 4
0
static int mcopy_atomic_pte(struct mm_struct *dst_mm,
                            pmd_t *dst_pmd,
                            struct vm_area_struct *dst_vma,
                            unsigned long dst_addr,
                            unsigned long src_addr,
                            struct page **pagep)
{
    struct mem_cgroup *memcg;
    pte_t _dst_pte, *dst_pte;
    spinlock_t *ptl;
    void *page_kaddr;
    int ret;
    struct page *page;

    if (!*pagep) {
        ret = -ENOMEM;
        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
        if (!page)
            goto out;

        page_kaddr = kmap_atomic(page);
        ret = copy_from_user(page_kaddr,
                             (const void __user *) src_addr,
                             PAGE_SIZE);
        kunmap_atomic(page_kaddr);

        /* fallback to copy_from_user outside mmap_sem */
        if (unlikely(ret)) {
            ret = -EFAULT;
            *pagep = page;
            /* don't free the page */
            goto out;
        }
    } else {
        page = *pagep;
        *pagep = NULL;
    }

    /*
     * The memory barrier inside __SetPageUptodate makes sure that
     * preceeding stores to the page contents become visible before
     * the set_pte_at() write.
     */
    __SetPageUptodate(page);

    ret = -ENOMEM;
    if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg))
        goto out_release;

    _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
    if (dst_vma->vm_flags & VM_WRITE)
        _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));

    ret = -EEXIST;
    dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
    if (!pte_none(*dst_pte))
        goto out_release_uncharge_unlock;

    inc_mm_counter(dst_mm, MM_ANONPAGES);
    page_add_new_anon_rmap(page, dst_vma, dst_addr);
    mem_cgroup_commit_charge(page, memcg, false);
    lru_cache_add_active_or_unevictable(page, dst_vma);

    set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);

    /* No need to invalidate - it was non-present before */
    update_mmu_cache(dst_vma, dst_addr, dst_pte);

    pte_unmap_unlock(dst_pte, ptl);
    ret = 0;
out:
    return ret;
out_release_uncharge_unlock:
    pte_unmap_unlock(dst_pte, ptl);
    mem_cgroup_cancel_charge(page, memcg);
out_release:
    page_cache_release(page);
    goto out;
}
Esempio n. 5
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 */
void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
	unsigned long address, int write_access)
{
	pgd_t *page_dir;
	pmd_t *page_middle;
	pte_t *page_table, pte;
	unsigned long old_page, new_page;

	new_page = __get_free_page(GFP_KERNEL);
	page_dir = pgd_offset(vma->vm_mm, address);
	if (pgd_none(*page_dir))
		goto end_wp_page;
	if (pgd_bad(*page_dir))
		goto bad_wp_pagedir;
	page_middle = pmd_offset(page_dir, address);
	if (pmd_none(*page_middle))
		goto end_wp_page;
	if (pmd_bad(*page_middle))
		goto bad_wp_pagemiddle;
	page_table = pte_offset(page_middle, address);
	pte = *page_table;
	if (!pte_present(pte))
		goto end_wp_page;
	if (pte_write(pte))
		goto end_wp_page;
	old_page = pte_page(pte);
	if (old_page >= high_memory)
		goto bad_wp_page;
	tsk->min_flt++;
	/*
	 * Do we need to copy?
	 */
	if (mem_map[MAP_NR(old_page)].count != 1) {
		if (new_page) {
			if (PageReserved(mem_map + MAP_NR(old_page)))
				++vma->vm_mm->rss;
			copy_page(old_page,new_page);
			flush_page_to_ram(old_page);
			flush_page_to_ram(new_page);
			flush_cache_page(vma, address);
			set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
			free_page(old_page);
			flush_tlb_page(vma, address);
			return;
		}
		flush_cache_page(vma, address);
		set_pte(page_table, BAD_PAGE);
		flush_tlb_page(vma, address);
		free_page(old_page);
		oom(tsk);
		return;
	}
	flush_cache_page(vma, address);
	set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
	flush_tlb_page(vma, address);
	if (new_page)
		free_page(new_page);
	return;
bad_wp_page:
	printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
	send_sig(SIGKILL, tsk, 1);
	goto end_wp_page;
bad_wp_pagemiddle:
	printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
	send_sig(SIGKILL, tsk, 1);
	goto end_wp_page;
bad_wp_pagedir:
	printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
	send_sig(SIGKILL, tsk, 1);
end_wp_page:
	if (new_page)
		free_page(new_page);
	return;
}