示例#1
0
/*
 * Trying to stop swapping from a file is fraught with races, so
 * we repeat quite a bit here when we have to pause. swapoff()
 * isn't exactly timing-critical, so who cares (but this is /really/
 * inefficient, ugh).
 *
 * We return 1 after having slept, which makes the process start over
 * from the beginning for this process..
 */
static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
	pte_t *dir, unsigned int type, unsigned long page)
{
	pte_t pte = *dir;

	if (pte_none(pte))
		return 0;
	if (pte_present(pte)) {
		unsigned long page_nr = MAP_NR(pte_page(pte));
		if (page_nr >= MAP_NR(high_memory))
			return 0;
		if (!in_swap_cache(page_nr))
			return 0;
		if (SWP_TYPE(in_swap_cache(page_nr)) != type)
			return 0;
		delete_from_swap_cache(page_nr);
		set_pte(dir, pte_mkdirty(pte));
		return 0;
	}
	if (SWP_TYPE(pte_val(pte)) != type)
		return 0;
	read_swap_page(pte_val(pte), (char *) page);
#if 0 /* Is this really needed here, hasn't it been solved elsewhere? */
	flush_page_to_ram(page);
#endif
	if (pte_val(*dir) != pte_val(pte)) {
		free_page(page);
		return 1;
	}
	set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
	flush_tlb_page(vma, address);
	++vma->vm_mm->rss;
	swap_free(pte_val(pte));
	return 1;
}
示例#2
0
/* this routine handles present pages, when users try to write
   to a shared page.
   */
void do_wp_page(struct vm_area_struct *vma, unsigned long address, int write_access)
{
    pgd_t *pgd;
    pmd_t *pmd;
    pte_t *page_table,pte;
    unsigned long old_page, new_page;

    new_page = get_free_page(GFP_KERNEL);

    pgd = pgd_offset(vma->vm_task, address);
    if(pgd_none(*pgd))
        goto end_wp_page;
    if(pgd_bad(*pgd))
        goto bad_wp_page;
    pmd = pmd_offset(pgd,address);
    if(pmd_none(*pmd))
        goto end_wp_page;
    if(pmd_bad(*pmd))
        goto bad_wp_page;
    page_table = pte_offset(pmd,address);
    pte = *page_table;
    if(!pte_present(pte))
        goto end_wp_page;
    if(pte_write(pte))
        goto  end_wp_page;
    old_page = pte_page(pte);
    if(old_page >= main_memory_end)
        goto bad_wp_page;

    (vma->vm_task->mm->min_flt)++;

    if(mem_map[MAP_NR(old_page)].flags & PAGE_PRESENT)
    {
        if(new_page)
        {
            if(mem_map[MAP_NR(old_page)].flags & MAP_PAGE_RESERVED)
                ++(vma->vm_task->mm->rss);
            copy_page(old_page, new_page);
            *page_table = pte_mkwrite(pte_mkdirty(mk_pte((unsigned long)&new_page, vma->vm_page_prot)));
            free_page(old_page);
            return;
        }
        pte_val(*page_table) &= PAGE_BAD;
        free_page(old_page);
        oom();
        return;
    }
    *page_table = pte_mkdirty(pte_mkwrite(pte));
    if(new_page)
        free_page(new_page);
    return;
bad_wp_page:
    printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
    goto end_wp_page;
end_wp_page:
    if(new_page)
        free_page(new_page);
    return;
}
示例#3
0
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
			      unsigned long addr, pte_t *ptep)
{
	pte_t pte;

	if (pte_cont(*ptep)) {
		int ncontig, i;
		size_t pgsize;
		pte_t *cpte;
		bool is_dirty = false;

		cpte = huge_pte_offset(mm, addr);
		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
		/* save the 1st pte to return */
		pte = ptep_get_and_clear(mm, addr, cpte);
		for (i = 1; i < ncontig; ++i) {
			/*
			 * If HW_AFDBM is enabled, then the HW could
			 * turn on the dirty bit for any of the page
			 * in the set, so check them all.
			 */
			++cpte;
			if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
				is_dirty = true;
		}
		if (is_dirty)
			return pte_mkdirty(pte);
		else
			return pte;
	} else {
		return ptep_get_and_clear(mm, addr, ptep);
	}
}
示例#4
0
/*
 * Changing some bits of contiguous entries requires us to follow a
 * Break-Before-Make approach, breaking the whole contiguous set
 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 * "Misprogramming of the Contiguous bit", page D4-1762.
 *
 * This helper performs the break step.
 */
static pte_t get_clear_flush(struct mm_struct *mm,
			     unsigned long addr,
			     pte_t *ptep,
			     unsigned long pgsize,
			     unsigned long ncontig)
{
	pte_t orig_pte = huge_ptep_get(ptep);
	bool valid = pte_valid(orig_pte);
	unsigned long i, saddr = addr;

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
		pte_t pte = ptep_get_and_clear(mm, addr, ptep);

		/*
		 * If HW_AFDBM is enabled, then the HW could turn on
		 * the dirty or accessed bit for any page in the set,
		 * so check them all.
		 */
		if (pte_dirty(pte))
			orig_pte = pte_mkdirty(orig_pte);

		if (pte_young(pte))
			orig_pte = pte_mkyoung(orig_pte);
	}

	if (valid) {
		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
		flush_tlb_range(&vma, saddr, addr);
	}
	return orig_pte;
}
示例#5
0
/*
 * page not present ... go through shm_pages
 */
static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
{
    pte_t pte;
    struct shmid_kernel *shp;
    unsigned int id, idx;

    id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
    idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;

#ifdef DEBUG_SHM
    if (id > max_shmid) {
        printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id);
        return 0;
    }
#endif
    shp = shm_segs[id];

#ifdef DEBUG_SHM
    if (shp == IPC_UNUSED || shp == IPC_NOID) {
        printk ("shm_nopage: id=%d invalid. Race.\n", id);
        return 0;
    }
#endif
    /* This can occur on a remap */

    if (idx >= shp->shm_npages) {
        return 0;
    }

    pte = __pte(shp->shm_pages[idx]);
    if (!pte_present(pte)) {
        unsigned long page = get_free_page(GFP_USER);
        if (!page)
            return -1;
        pte = __pte(shp->shm_pages[idx]);
        if (pte_present(pte)) {
            free_page (page); /* doesn't sleep */
            goto done;
        }
        if (!pte_none(pte)) {
            rw_swap_page_nocache(READ, pte_val(pte), (char *)page);
            pte = __pte(shp->shm_pages[idx]);
            if (pte_present(pte))  {
                free_page (page); /* doesn't sleep */
                goto done;
            }
            swap_free(pte_val(pte));
            shm_swp--;
        }
        shm_rss++;
        pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
        shp->shm_pages[idx] = pte_val(pte);
    } else
        --current->maj_flt;  /* was incremented in do_no_page */

done:	/* pte_val(pte) == shp->shm_pages[idx] */
    current->min_flt++;
    atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count);
    return pte_page(pte);
}
示例#6
0
/*
 * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
 */
static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
		pte_t *page_table)
{
	flush_page_to_ram(new_page);
	flush_cache_page(vma, address);
	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
}
示例#7
0
/*
 * Changing some bits of contiguous entries requires us to follow a
 * Break-Before-Make approach, breaking the whole contiguous set
 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 * "Misprogramming of the Contiguous bit", page D4-1762.
 *
 * This helper performs the break step.
 */
static pte_t get_clear_flush(struct mm_struct *mm,
			     unsigned long addr,
			     pte_t *ptep,
			     unsigned long pgsize,
			     unsigned long ncontig)
{
	struct vm_area_struct vma = { .vm_mm = mm };
	pte_t orig_pte = huge_ptep_get(ptep);
	bool valid = pte_valid(orig_pte);
	unsigned long i, saddr = addr;

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
		pte_t pte = ptep_get_and_clear(mm, addr, ptep);

		/*
		 * If HW_AFDBM is enabled, then the HW could turn on
		 * the dirty bit for any page in the set, so check
		 * them all.  All hugetlb entries are already young.
		 */
		if (pte_dirty(pte))
			orig_pte = pte_mkdirty(orig_pte);
	}

	if (valid)
		flush_tlb_range(&vma, saddr, addr);
	return orig_pte;
}
示例#8
0
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
			       unsigned long addr, pte_t *ptep,
			       pte_t pte, int dirty)
{
	int ncontig, i, changed = 0;
	size_t pgsize = 0;
	unsigned long pfn = pte_pfn(pte), dpfn;
	pgprot_t hugeprot;
	pte_t orig_pte;

	if (!pte_cont(pte))
		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);

	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
	dpfn = pgsize >> PAGE_SHIFT;

	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
	if (!pte_same(orig_pte, pte))
		changed = 1;

	/* Make sure we don't lose the dirty state */
	if (pte_dirty(orig_pte))
		pte = pte_mkdirty(pte);

	hugeprot = pte_pgprot(pte);
	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));

	return changed;
}
示例#9
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We hold the mm semaphore and the page_table_lock on entry and exit
 * with the page_table_lock released.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;

	if (!TryLockPage(old_page)) {
		int reuse = can_share_swap_page(old_page);
		unlock_page(old_page);
		if (reuse) {
#ifndef CONFIG_SUPERH
			/* Not needed for VIPT cache */
			flush_cache_page(vma, address);
#endif
			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
			spin_unlock(&mm->page_table_lock);
			return 1;	/* Minor fault */
		}
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	page_cache_get(old_page);
	spin_unlock(&mm->page_table_lock);

	new_page = alloc_page(GFP_HIGHUSER);
	if (!new_page)
		goto no_mem;
	copy_cow_page(old_page,new_page,address);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	spin_lock(&mm->page_table_lock);
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, new_page, address, page_table);
		lru_cache_add(new_page);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	page_cache_release(old_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
no_mem:
	page_cache_release(old_page);
	return -1;
}
示例#10
0
文件: memory.c 项目: davidbau/davej
/*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 *
 * Note the "page_table_lock". It is to protect against kswapd removing
 * pages from under us. Note that kswapd only ever _removes_ pages, never
 * adds them. As such, once we have noticed that the page is not present,
 * we can drop the lock early.
 *
 * The adding of pages is protected by the MM semaphore (which we hold),
 * so we don't need to worry about a page being suddenly been added into
 * our VM.
 */
static inline int handle_pte_fault(struct mm_struct *mm,
	struct vm_area_struct * vma, unsigned long address,
	int write_access, pte_t * pte)
{
	pte_t entry;

	/*
	 * We need the page table lock to synchronize with kswapd
	 * and the SMP-safe atomic PTE updates.
	 */
	spin_lock(&mm->page_table_lock);
	entry = *pte;
	if (!pte_present(entry)) {
		/*
		 * If it truly wasn't present, we know that kswapd
		 * and the PTE updates will not touch it later. So
		 * drop the lock.
		 */
		spin_unlock(&mm->page_table_lock);
		if (pte_none(entry))
			return do_no_page(mm, vma, address, write_access, pte);
		return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
	}

	if (write_access) {
		if (!pte_write(entry))
			return do_wp_page(mm, vma, address, pte, entry);

		entry = pte_mkdirty(entry);
	}
	entry = pte_mkyoung(entry);
	establish_pte(vma, address, pte, entry);
	spin_unlock(&mm->page_table_lock);
	return 1;
}
示例#11
0
/*
 * This routine is used to map in a page into an address space: needed by
 * execve() for the initial stack and environment pages.
 */
unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
{
	pgd_t * pgd;
	pmd_t * pmd;
	pte_t * pte;

	if (page >= high_memory)
		printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
	if (mem_map[MAP_NR(page)].count != 1)
		printk("mem_map disagrees with %08lx at %08lx\n",page,address);
	pgd = pgd_offset(tsk->mm,address);
	pmd = pmd_alloc(pgd, address);
	if (!pmd) {
		free_page(page);
		oom(tsk);
		return 0;
	}
	pte = pte_alloc(pmd, address);
	if (!pte) {
		free_page(page);
		oom(tsk);
		return 0;
	}
	if (!pte_none(*pte)) {
		printk("put_dirty_page: page already exists\n");
		free_page(page);
		return 0;
	}
	flush_page_to_ram(page);
	set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
/* no need for invalidate */
	return page;
}
示例#12
0
static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte, int cow)
{
	pte_t pte = *old_pte;
	unsigned long page_nr;

	if (pte_none(pte))
		return;
	if (!pte_present(pte)) {
		swap_duplicate(pte_val(pte));
		set_pte(new_pte, pte);
		return;
	}
	page_nr = MAP_NR(pte_page(pte));
	if (page_nr >= MAP_NR(high_memory) || PageReserved(mem_map+page_nr)) {
		set_pte(new_pte, pte);
		return;
	}
	if (cow)
		pte = pte_wrprotect(pte);
	if (delete_from_swap_cache(page_nr))
		pte = pte_mkdirty(pte);
	set_pte(new_pte, pte_mkold(pte));
	set_pte(old_pte, pte);
	mem_map[page_nr].count++;
}
示例#13
0
/* Remap IO memory, the same way as remap_pfn_range(), but use
 * the obio memory space.
 *
 * They use a pgprot that sets PAGE_IO and does not check the
 * mem_map table as this is independent of normal memory.
 */
static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
				      unsigned long address,
				      unsigned long size,
				      unsigned long offset, pgprot_t prot,
				      int space)
{
	unsigned long end;

	/* clear hack bit that was used as a write_combine side-effect flag */
	offset &= ~0x1UL;
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t entry;
		unsigned long curend = address + PAGE_SIZE;
		
		entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
		if (!(address & 0xffff)) {
			if (PAGE_SIZE < (4 * 1024 * 1024) &&
			    !(address & 0x3fffff) &&
			    !(offset & 0x3ffffe) &&
			    end >= address + 0x400000) {
				entry = mk_pte_io(offset, prot, space,
						  4 * 1024 * 1024);
				curend = address + 0x400000;
				offset += 0x400000;
			} else if (PAGE_SIZE < (512 * 1024) &&
				   !(address & 0x7ffff) &&
				   !(offset & 0x7fffe) &&
				   end >= address + 0x80000) {
				entry = mk_pte_io(offset, prot, space,
						  512 * 1024 * 1024);
				curend = address + 0x80000;
				offset += 0x80000;
			} else if (PAGE_SIZE < (64 * 1024) &&
				   !(offset & 0xfffe) &&
				   end >= address + 0x10000) {
				entry = mk_pte_io(offset, prot, space,
						  64 * 1024);
				curend = address + 0x10000;
				offset += 0x10000;
			} else
				offset += PAGE_SIZE;
		} else
			offset += PAGE_SIZE;

		if (pte_write(entry))
			entry = pte_mkdirty(entry);
		do {
			BUG_ON(!pte_none(*pte));
			set_pte_at(mm, address, pte, entry);
			address += PAGE_SIZE;
			pte_val(entry) += PAGE_SIZE;
			pte++;
		} while (address < curend);
	} while (address < end);
}
示例#14
0
pte_t __bad_page(void)
{
	extern char empty_bad_page[PAGE_SIZE];
	unsigned long page = (unsigned long)empty_bad_page;

	clear_page(page);
	return pte_mkdirty(mk_pte(page, PAGE_SHARED));
}
示例#15
0
/*
 * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
 */
static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
		pte_t *page_table)
{
	flush_page_to_ram(new_page);
#ifndef CONFIG_SUPERH
	/* Not needed for VIPT cache (need better API for caches) */
	flush_cache_page(vma, address);
#endif
	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
}
示例#16
0
pte_t __bad_page(void)
{
	extern char empty_bad_page[PAGE_SIZE];

	__asm__ __volatile__("cld ; rep ; stosl":
		:"a" (0),
		 "D" ((long) empty_bad_page),
		 "c" (PAGE_SIZE/4)
		:"di","cx");
	return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
}
示例#17
0
/*
 * Free the swap entry and set the new pte for the shm page.
 */
static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
                           unsigned long page, unsigned long entry)
{
    pte_t pte;

    pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
    shp->shm_pages[idx] = pte_val(pte);
    atomic_inc(&mem_map[MAP_NR(page)].count);
    shm_rss++;

    swap_free(entry);
    shm_swp--;
}
示例#18
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pmd_t *pgmiddle;
	pte_t *pgtable;
	unsigned long page;
		
repeat:
	pgdir = pgd_offset(vma->vm_mm, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgmiddle = pmd_offset(pgdir,addr);
	if (pmd_none(*pgmiddle)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pmd_bad(*pgmiddle)) {
		printk("ptrace: bad page directory %08lx\n",
		       pmd_val(*pgmiddle));
		pmd_clear(pgmiddle);
		return;
	}
	pgtable = pte_offset(pgmiddle, addr);
	if (!pte_present(*pgtable)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(tsk, vma, addr, 2);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		*(unsigned long *) (page + (addr & ~PAGE_MASK)) = data;
		flush_page_to_ram (page);
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	flush_tlb_all();
}
示例#19
0
/*
 * The swap entry has been read in advance, and we return 1 to indicate
 * that the page has been used or is no longer needed.
 *
 * Always set the resulting pte to be nowrite (the same as COW pages
 * after one process has exited).  We don't know just how many PTEs will
 * share this swap entry, so be cautious and let do_wp_page work out
 * what to do if a write is requested later.
 */
static inline void unswap_pte(struct vm_area_struct * vma, unsigned long
			      address, pte_t *dir, unsigned long entry,
			      unsigned long page /*, int isswap */)
{
	pte_t pte = *dir;

	if (pte_none(pte))
		return;
	if (pte_present(pte)) {
		/* If this entry is swap-cached, then page must already
                   hold the right address for any copies in physical
                   memory */
		if (pte_page(pte) != page)
			return;
		if (0 /* isswap */)
			mem_map[MAP_NR(pte_page(pte))].offset = page;
		else
			/* We will be removing the swap cache in a moment, so... */
			set_pte(dir, pte_mkdirty(pte));
		return;
	}
	if (pte_val(pte) != entry)
		return;

	if (0 /* isswap */) {
		DPRINTK( "unswap_pte: replacing entry %08lx by %08lx", entry, page );
		set_pte(dir, __pte(page));
	}
	else {
		DPRINTK( "unswap_pte: replacing entry %08lx by new page %08lx",
				 entry, page );
		set_pte(dir, pte_mkdirty(mk_pte(page,vma->vm_page_prot)));
		atomic_inc(&mem_map[MAP_NR(page)].count);
		++vma->vm_mm->rss;
	}
	swap_free(entry);
}
示例#20
0
void __iomem *
ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
{
	pte_t pte = __pte(flags);

	/* writeable implies dirty for kernel addresses */
	if (pte_write(pte))
		pte = pte_mkdirty(pte);

	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
	pte = pte_exprotect(pte);
	pte = pte_mkprivileged(pte);

	return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
}
示例#21
0
/*
 * The above separate functions for the no-page and wp-page
 * cases will go away (they mostly do the same thing anyway),
 * and we'll instead use only a general "handle_mm_fault()".
 *
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 */
static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
	int write_access, pte_t * pte)
{
	if (!pte_present(*pte)) {
		do_no_page(current, vma, address, write_access);
		return;
	}
	set_pte(pte, pte_mkyoung(*pte));
	flush_tlb_page(vma, address);
	if (!write_access)
		return;
	if (pte_write(*pte)) {
		set_pte(pte, pte_mkdirty(*pte));
		flush_tlb_page(vma, address);
		return;
	}
	do_wp_page(current, vma, address, write_access);
}
示例#22
0
文件: memory.c 项目: davidbau/davej
/*
 * This only needs the MM semaphore
 */
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
	struct page *page = NULL;
	pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
	if (write_access) {
		page = alloc_page(GFP_HIGHUSER);
		if (!page)
			return -1;
		clear_user_highpage(page, addr);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		mm->rss++;
		flush_page_to_ram(page);
	}
	set_pte(page_table, entry);
	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, addr, entry);
	return 1;	/* Minor fault */
}
示例#23
0
/*
 *  the function for the no-page and wp_page
 */
void handle_pte_fault(struct vm_area_struct *vma, unsigned long address, pte_t *pte,int write_access)
{
    // no page present
    if(!pte_present(*pte))
    {
        do_no_page(vma,address,write_access);
        return;
    }
    *pte = pte_mkyoung(*pte);
    if(!write_access)
    {
        *pte = pte_mkdirty(*pte);
        return;
    }
    // write the shared page 
    do_wp_page(vma, address, write_access);

}
示例#24
0
文件: memory.c 项目: davidbau/davej
/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 *
 * This is called with the MM semaphore held.
 */
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
	unsigned long address, int write_access, pte_t *page_table)
{
	struct page * new_page;
	pte_t entry;

	if (!vma->vm_ops || !vma->vm_ops->nopage)
		return do_anonymous_page(mm, vma, page_table, write_access, address);

	/*
	 * The third argument is "no_share", which tells the low-level code
	 * to copy, not share the page even if sharing is possible.  It's
	 * essentially an early COW detection.
	 */
	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
	if (new_page == NULL)	/* no page was available -- SIGBUS */
		return 0;
	if (new_page == NOPAGE_OOM)
		return -1;
	++mm->rss;
	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * an exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	flush_page_to_ram(new_page);
	flush_icache_page(vma, new_page);
	entry = mk_pte(new_page, vma->vm_page_prot);
	if (write_access) {
		entry = pte_mkwrite(pte_mkdirty(entry));
	} else if (page_count(new_page) > 1 &&
		   !(vma->vm_flags & VM_SHARED))
		entry = pte_wrprotect(entry);
	set_pte(page_table, entry);
	/* no need to invalidate: a not-present page shouldn't be cached */
	update_mmu_cache(vma, address, entry);
	return 2;	/* Major fault */
}
示例#25
0
/*
 * We are called with the MM semaphore and page_table_lock
 * spinlock held to protect against concurrent faults in
 * multithreaded programs. 
 */
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
	pte_t entry;

	/* Read-only mapping of ZERO_PAGE. */
	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));

	/* ..except if it's a write access */
	if (write_access) {
		struct page *page;

		/* Allocate our own private page. */
		spin_unlock(&mm->page_table_lock);

		page = alloc_page(GFP_HIGHUSER);
		if (!page)
			goto no_mem;
		clear_user_highpage(page, addr);

		spin_lock(&mm->page_table_lock);
		if (!pte_none(*page_table)) {
			page_cache_release(page);
			spin_unlock(&mm->page_table_lock);
			return 1;
		}
		mm->rss++;
		flush_page_to_ram(page);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		lru_cache_add(page);
		mark_page_accessed(page);
	}

	set_pte(page_table, entry);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, addr, entry);
	spin_unlock(&mm->page_table_lock);
	return 1;	/* Minor fault */

no_mem:
	return -1;
}
示例#26
0
文件: memory.c 项目: davidbau/davej
static int do_swap_page(struct mm_struct * mm,
	struct vm_area_struct * vma, unsigned long address,
	pte_t * page_table, swp_entry_t entry, int write_access)
{
	struct page *page = lookup_swap_cache(entry);
	pte_t pte;

	if (!page) {
		lock_kernel();
		swapin_readahead(entry);
		page = read_swap_cache(entry);
		unlock_kernel();
		if (!page)
			return -1;

		flush_page_to_ram(page);
		flush_icache_page(vma, page);
	}

	mm->rss++;

	pte = mk_pte(page, vma->vm_page_prot);

	/*
	 * Freeze the "shared"ness of the page, ie page_count + swap_count.
	 * Must lock page before transferring our swap count to already
	 * obtained page count.
	 */
	lock_page(page);
	swap_free(entry);
	if (write_access && !is_page_shared(page))
		pte = pte_mkwrite(pte_mkdirty(pte));
	UnlockPage(page);

	set_pte(page_table, pte);
	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, address, pte);
	return 1;	/* Minor fault */
}
示例#27
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pte_t *pgtable;
	unsigned long page;

repeat:
	pgdir = PAGE_DIR_OFFSET(vma->vm_task, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgtable = (pte_t *) (PAGE_PTR(addr) + pgd_page(*pgdir));
	if (!pte_present(*pgtable)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(vma, addr, 1);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		page += addr & ~PAGE_MASK;
		*(unsigned long *) page = data;
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	invalidate();
}
示例#28
0
static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
		pte_t *pte, unsigned int flags)
{
	/* No page to get reference */
	if (flags & FOLL_GET)
		return -EFAULT;

	if (flags & FOLL_TOUCH) {
		pte_t entry = *pte;

		if (flags & FOLL_WRITE)
			entry = pte_mkdirty(entry);
		entry = pte_mkyoung(entry);

		if (!pte_same(*pte, entry)) {
			set_pte_at(vma->vm_mm, address, pte, entry);
			update_mmu_cache(vma, address, pte);
		}
	}

	/* Proper page table entry exists, but no corresponding struct page */
	return -EEXIST;
}
示例#29
0
/*H:330
 * (i) Looking up a page table entry when the Guest faults.
 *
 * We saw this call in run_guest(): when we see a page fault in the Guest, we
 * come here.  That's because we only set up the shadow page tables lazily as
 * they're needed, so we get page faults all the time and quietly fix them up
 * and return to the Guest without it knowing.
 *
 * If we fixed up the fault (ie. we mapped the address), this routine returns
 * true.  Otherwise, it was a real fault and we need to tell the Guest.
 */
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
{
	pgd_t gpgd;
	pgd_t *spgd;
	unsigned long gpte_ptr;
	pte_t gpte;
	pte_t *spte;

	/* Mid level for PAE. */
#ifdef CONFIG_X86_PAE
	pmd_t *spmd;
	pmd_t gpmd;
#endif

	/* First step: get the top-level Guest page table entry. */
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpgd = __pgd(CHECK_GPGD_MASK);
	} else {
		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
		/* Toplevel not present?  We can't map it in. */
		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
	if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}
		/* We check that the Guest pgd is OK. */
		check_gpgd(cpu, gpgd);
		/*
		 * And we copy the flags to the shadow PGD entry.  The page
		 * number in the shadow PGD is the page we just allocated.
		 */
		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
	}

#ifdef CONFIG_X86_PAE
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpmd = __pmd(_PAGE_TABLE);
	} else {
		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
		/* Middle level not present?  We can't map it in. */
		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spmd = spmd_addr(cpu, *spgd, vaddr);

	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);

		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}

		/* We check that the Guest pmd is OK. */
		check_gpmd(cpu, gpmd);

		/*
		 * And we copy the flags to the shadow PMD entry.  The page
		 * number in the shadow PMD is the page we just allocated.
		 */
		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
	}

	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
#else
	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif

	if (unlikely(cpu->linear_pages)) {
		/* Linear?  Make up a PTE which points to same page. */
		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
	} else {
		/* Read the actual PTE value. */
		gpte = lgread(cpu, gpte_ptr, pte_t);
	}

	/* If this page isn't in the Guest page tables, we can't page it in. */
	if (!(pte_flags(gpte) & _PAGE_PRESENT))
		return false;

	/*
	 * Check they're not trying to write to a page the Guest wants
	 * read-only (bit 2 of errcode == write).
	 */
	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
		return false;

	/* User access to a kernel-only page? (bit 3 == user access) */
	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
		return false;

	/*
	 * Check that the Guest PTE flags are OK, and the page number is below
	 * the pfn_limit (ie. not mapping the Launcher binary).
	 */
	check_gpte(cpu, gpte);

	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
	gpte = pte_mkyoung(gpte);
	if (errcode & 2)
		gpte = pte_mkdirty(gpte);

	/* Get the pointer to the shadow PTE entry we're going to set. */
	spte = spte_addr(cpu, *spgd, vaddr);

	/*
	 * If there was a valid shadow PTE entry here before, we release it.
	 * This can happen with a write to a previously read-only entry.
	 */
	release_pte(*spte);

	/*
	 * If this is a write, we insist that the Guest page is writable (the
	 * final arg to gpte_to_spte()).
	 */
	if (pte_dirty(gpte))
		*spte = gpte_to_spte(cpu, gpte, 1);
	else
		/*
		 * If this is a read, don't set the "writable" bit in the page
		 * table entry, even if the Guest says it's writable.  That way
		 * we will come back here when a write does actually occur, so
		 * we can update the Guest's _PAGE_DIRTY flag.
		 */
		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

	/*
	 * Finally, we write the Guest PTE entry back: we've set the
	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
	 */
	if (likely(!cpu->linear_pages))
		lgwrite(cpu, gpte_ptr, pte_t, gpte);

	/*
	 * The fault is fixed, the page table is populated, the mapping
	 * manipulated, the result returned and the code complete.  A small
	 * delay and a trace of alliteration are the only indications the Guest
	 * has that a page fault occurred at all.
	 */
	return true;
}
示例#30
0
文件: memory.c 项目: davidbau/davej
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We enter with the page table read-lock held, and need to exit without
 * it.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;
	
	/*
	 * We can avoid the copy if:
	 * - we're the only user (count == 1)
	 * - the only other user is the swap cache,
	 *   and the only swap cache user is itself,
	 *   in which case we can just continue to
	 *   use the same swap cache (it will be
	 *   marked dirty).
	 */
	switch (page_count(old_page)) {
	case 2:
		/*
		 * Lock the page so that no one can look it up from
		 * the swap cache, grab a reference and start using it.
		 * Can not do lock_page, holding page_table_lock.
		 */
		if (!PageSwapCache(old_page) || TryLockPage(old_page))
			break;
		if (is_page_shared(old_page)) {
			UnlockPage(old_page);
			break;
		}
		UnlockPage(old_page);
		/* FallThrough */
	case 1:
		flush_cache_page(vma, address);
		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
		spin_unlock(&mm->page_table_lock);
		return 1;	/* Minor fault */
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	spin_unlock(&mm->page_table_lock);
	new_page = page_cache_alloc();
	if (!new_page)
		return -1;
	spin_lock(&mm->page_table_lock);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, old_page, new_page, address, page_table);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
}