Exemple #1
0
static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte, int cow)
{
	pte_t pte = *old_pte;
	unsigned long page_nr;

	if (pte_none(pte))
		return;
	if (!pte_present(pte)) {
		swap_duplicate(pte_val(pte));
		set_pte(new_pte, pte);
		return;
	}
	page_nr = MAP_NR(pte_page(pte));
	if (page_nr >= MAP_NR(high_memory) || PageReserved(mem_map+page_nr)) {
		set_pte(new_pte, pte);
		return;
	}
	if (cow)
		pte = pte_wrprotect(pte);
	if (delete_from_swap_cache(page_nr))
		pte = pte_mkdirty(pte);
	set_pte(new_pte, pte_mkold(pte));
	set_pte(old_pte, pte);
	mem_map[page_nr].count++;
}
Exemple #2
0
void huge_ptep_set_wrprotect(struct mm_struct *mm,
			     unsigned long addr, pte_t *ptep)
{
	unsigned long pfn, dpfn;
	pgprot_t hugeprot;
	int ncontig, i;
	size_t pgsize;
	pte_t pte;

	if (!pte_cont(READ_ONCE(*ptep))) {
		ptep_set_wrprotect(mm, addr, ptep);
		return;
	}

	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
	dpfn = pgsize >> PAGE_SHIFT;

	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
	pte = pte_wrprotect(pte);

	hugeprot = pte_pgprot(pte);
	pfn = pte_pfn(pte);

	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}
Exemple #3
0
static void make_page_ro(unsigned long addr)
{
	unsigned int level;
	pte_t *pte = lookup_address(addr, &level);
	*pte = pte_wrprotect(*pte);
	flush_cache_all();
}
Exemple #4
0
int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
{
	int error = 0;
	pgd_t * dir;
	unsigned long beg = address;
	unsigned long end = address + size;
	pte_t zero_pte;

	zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot));
	dir = pgd_offset(current->mm, address);
	flush_cache_range(current->mm, beg, end);
	while (address < end) {
		pmd_t *pmd = pmd_alloc(dir, address);
		error = -ENOMEM;
		if (!pmd)
			break;
		error = zeromap_pmd_range(pmd, address, end - address, zero_pte);
		if (error)
			break;
		address = (address + PGDIR_SIZE) & PGDIR_MASK;
#ifdef CONFIG_BESTA
		if (!address)  break;  /*  unsigned overflow   */
#endif
		dir++;
	}
	flush_tlb_range(current->mm, beg, end);
	return error;
}
void make_lowmem_page_readonly(void *va, unsigned int feature)
{
	pte_t *pte;
	int rc;

	if (xen_feature(feature))
		return;

	pte = virt_to_ptep(va);
	rc = HYPERVISOR_update_va_mapping(
		(unsigned long)va, pte_wrprotect(*pte), 0);
	BUG_ON(rc);
}
Exemple #6
0
void make_lowmem_page_readonly(void *vaddr)
{
	pte_t *pte, ptev;
	unsigned long address = (unsigned long)vaddr;

	pte = lookup_address(address);
	BUG_ON(pte == NULL);

	ptev = pte_wrprotect(*pte);

	if (HYPERVISOR_update_va_mapping(address, ptev, 0))
		BUG();
}
static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
			 unsigned long kernel)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	pgd = pgd_offset(mm, proc);
	pud = pud_alloc(mm, pgd, proc);
	if (!pud)
		goto out;

	pmd = pmd_alloc(mm, pud, proc);
	if (!pmd)
		goto out_pmd;

	pte = pte_alloc_map(mm, pmd, proc);
	if (!pte)
		goto out_pte;

	/* There's an interaction between the skas0 stub pages, stack
	 * randomization, and the BUG at the end of exit_mmap.  exit_mmap
         * checks that the number of page tables freed is the same as had
         * been allocated.  If the stack is on the last page table page,
	 * then the stack pte page will be freed, and if not, it won't.  To
	 * avoid having to know where the stack is, or if the process mapped
	 * something at the top of its address space for some other reason,
	 * we set TASK_SIZE to end at the start of the last page table.
	 * This keeps exit_mmap off the last page, but introduces a leak
	 * of that page.  So, we hang onto it here and free it in
	 * destroy_context_skas.
	 */

        mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
#ifdef CONFIG_3_LEVEL_PGTABLES
        mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
#endif

	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
	*pte = pte_mkexec(*pte);
	*pte = pte_wrprotect(*pte);
	return(0);

 out_pmd:
	pud_free(pud);
 out_pte:
	pmd_free(pmd);
 out:
	return(-ENOMEM);
}
void make_page_readonly(void *va, unsigned int feature)
{
	pte_t *pte;
	int rc;

	if (xen_feature(feature))
		return;

	pte = virt_to_ptep(va);
	rc = HYPERVISOR_update_va_mapping(
		(unsigned long)va, pte_wrprotect(*pte), 0);
	if (rc) /* fallback? */
		xen_l1_entry_update(pte, pte_wrprotect(*pte));
	if ((unsigned long)va >= (unsigned long)high_memory) {
		unsigned long pfn = pte_pfn(*pte);
#ifdef CONFIG_HIGHMEM
		if (pfn >= highstart_pfn)
			kmap_flush_unused(); /* flush stale writable kmaps */
		else
#endif
			make_lowmem_page_readonly(
				phys_to_virt(pfn << PAGE_SHIFT), feature); 
	}
}
Exemple #9
0
void make_lowmem_page_readonly(void *vaddr)
{
	pte_t *pte, ptev;
	unsigned long address = (unsigned long)vaddr;
	unsigned int level;

	pte = lookup_address(address, &level);
	if (pte == NULL)
		return;		/* vaddr missing */

	ptev = pte_wrprotect(*pte);

	if (HYPERVISOR_update_va_mapping(address, ptev, 0))
		BUG();
}
Exemple #10
0
static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
				unsigned long end)
{
	pte_t *pte = pte_offset_kernel(pmd, addr);
	pte_t zero_pte;

	zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL);
	zero_pte = pte_wrprotect(zero_pte);

	while (addr + PAGE_SIZE <= end) {
		set_pte_at(&init_mm, addr, pte, zero_pte);
		addr += PAGE_SIZE;
		pte = pte_offset_kernel(pmd, addr);
	}
}
Exemple #11
0
static void write_addr(u128 * target, u128 * inject)
{
    pte_t * ppt, pt;
    unsigned int level;

    if(pte_write(*lookup_address((unsigned long)target, &level)) == 0) {
        ppt = lookup_address((unsigned long)target, &level);
        pt = pte_mkwrite(*ppt);
        set_pte(ppt, pt);
        *target = *inject;
        ppt = lookup_address((unsigned long)target, &level);
        pt = pte_wrprotect(*ppt);
        set_pte(ppt, pt);
    }else {
        *target = *inject;
    }
}
Exemple #12
0
static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
                                     unsigned long size, pgprot_t prot)
{
	unsigned long end;

	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
		pte_t oldpage = ptep_get_and_clear(pte);
		set_pte(pte, zero_pte);
		forget_pte(oldpage);
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
}
Exemple #13
0
/*
 * This only needs the MM semaphore
 */
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
	struct page *page = NULL;
	pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
	if (write_access) {
		page = alloc_page(GFP_HIGHUSER);
		if (!page)
			return -1;
		clear_user_highpage(page, addr);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		mm->rss++;
		flush_page_to_ram(page);
	}
	set_pte(page_table, entry);
	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, addr, entry);
	return 1;	/* Minor fault */
}
Exemple #14
0
/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 *
 * This is called with the MM semaphore held.
 */
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
	unsigned long address, int write_access, pte_t *page_table)
{
	struct page * new_page;
	pte_t entry;

	if (!vma->vm_ops || !vma->vm_ops->nopage)
		return do_anonymous_page(mm, vma, page_table, write_access, address);

	/*
	 * The third argument is "no_share", which tells the low-level code
	 * to copy, not share the page even if sharing is possible.  It's
	 * essentially an early COW detection.
	 */
	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
	if (new_page == NULL)	/* no page was available -- SIGBUS */
		return 0;
	if (new_page == NOPAGE_OOM)
		return -1;
	++mm->rss;
	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * an exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	flush_page_to_ram(new_page);
	flush_icache_page(vma, new_page);
	entry = mk_pte(new_page, vma->vm_page_prot);
	if (write_access) {
		entry = pte_mkwrite(pte_mkdirty(entry));
	} else if (page_count(new_page) > 1 &&
		   !(vma->vm_flags & VM_SHARED))
		entry = pte_wrprotect(entry);
	set_pte(page_table, entry);
	/* no need to invalidate: a not-present page shouldn't be cached */
	update_mmu_cache(vma, address, entry);
	return 2;	/* Major fault */
}
Exemple #15
0
/*
 * We are called with the MM semaphore and page_table_lock
 * spinlock held to protect against concurrent faults in
 * multithreaded programs. 
 */
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
	pte_t entry;

	/* Read-only mapping of ZERO_PAGE. */
	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));

	/* ..except if it's a write access */
	if (write_access) {
		struct page *page;

		/* Allocate our own private page. */
		spin_unlock(&mm->page_table_lock);

		page = alloc_page(GFP_HIGHUSER);
		if (!page)
			goto no_mem;
		clear_user_highpage(page, addr);

		spin_lock(&mm->page_table_lock);
		if (!pte_none(*page_table)) {
			page_cache_release(page);
			spin_unlock(&mm->page_table_lock);
			return 1;
		}
		mm->rss++;
		flush_page_to_ram(page);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		lru_cache_add(page);
		mark_page_accessed(page);
	}

	set_pte(page_table, entry);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, addr, entry);
	spin_unlock(&mm->page_table_lock);
	return 1;	/* Minor fault */

no_mem:
	return -1;
}
Exemple #16
0
static inline void clear_soft_dirty(struct vm_area_struct *vma,
		unsigned long addr, pte_t *pte)
{
	/*
	 * The soft-dirty tracker uses #PF-s to catch writes
	 * to pages, so write-protect the pte as well. See the
	 * Documentation/vm/soft-dirty.txt for full description
	 * of how soft-dirty works.
	 */
	pte_t ptent = *pte;

	if (pte_present(ptent)) {
		ptent = pte_wrprotect(ptent);
		ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
	} else if (is_swap_pte(ptent)) {
		ptent = pte_swp_clear_soft_dirty(ptent);
	}

	set_pte_at(vma->vm_mm, addr, pte, ptent);
}
Exemple #17
0
// init_mm.page_table_lock must be held before calling!
static void pram_page_writeable(unsigned long addr, int rw)
{
	pgd_t *pgdp;
	pmd_t *pmdp;
	pte_t *ptep;
	
	pgdp = pgd_offset_k(addr);
	if (!pgd_none(*pgdp)) {
		pmdp = pmd_offset(pgdp, addr);
		if (!pmd_none(*pmdp)) {
			pte_t pte;
			ptep = pte_offset(pmdp, addr);
			pte = *ptep;
			if (pte_present(pte)) {
				pte = rw ? pte_mkwrite(pte) :
					pte_wrprotect(pte);
				set_pte(ptep, pte);
			}
		}
	}
}
Exemple #18
0
static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
				     unsigned long address, unsigned long size,
				     pgprot_t prot)
{
	unsigned long end;

	debug_lock_break(1);
	break_spin_lock(&mm->page_table_lock);

	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
		pte_t oldpage = ptep_get_and_clear(pte);
		set_pte(pte, zero_pte);
		forget_pte(oldpage);
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
}
Exemple #19
0
static inline void do_swap_page(struct task_struct * tsk, 
	struct vm_area_struct * vma, unsigned long address,
	pte_t * page_table, pte_t entry, int write_access)
{
	pte_t page;

	if (!vma->vm_ops || !vma->vm_ops->swapin) {
		swap_in(tsk, vma, page_table, pte_val(entry), write_access);
		flush_page_to_ram(pte_page(*page_table));
		return;
	}
	page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
	if (pte_val(*page_table) != pte_val(entry)) {
		free_page(pte_page(page));
		return;
	}
	if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED))
		page = pte_wrprotect(page);
	++vma->vm_mm->rss;
	++tsk->maj_flt;
	flush_page_to_ram(pte_page(page));
	set_pte(page_table, page);
	return;
}
Exemple #20
0
/*H:330
 * (i) Looking up a page table entry when the Guest faults.
 *
 * We saw this call in run_guest(): when we see a page fault in the Guest, we
 * come here.  That's because we only set up the shadow page tables lazily as
 * they're needed, so we get page faults all the time and quietly fix them up
 * and return to the Guest without it knowing.
 *
 * If we fixed up the fault (ie. we mapped the address), this routine returns
 * true.  Otherwise, it was a real fault and we need to tell the Guest.
 */
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
{
	pgd_t gpgd;
	pgd_t *spgd;
	unsigned long gpte_ptr;
	pte_t gpte;
	pte_t *spte;

	/* Mid level for PAE. */
#ifdef CONFIG_X86_PAE
	pmd_t *spmd;
	pmd_t gpmd;
#endif

	/* First step: get the top-level Guest page table entry. */
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpgd = __pgd(CHECK_GPGD_MASK);
	} else {
		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
		/* Toplevel not present?  We can't map it in. */
		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
	if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}
		/* We check that the Guest pgd is OK. */
		check_gpgd(cpu, gpgd);
		/*
		 * And we copy the flags to the shadow PGD entry.  The page
		 * number in the shadow PGD is the page we just allocated.
		 */
		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
	}

#ifdef CONFIG_X86_PAE
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpmd = __pmd(_PAGE_TABLE);
	} else {
		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
		/* Middle level not present?  We can't map it in. */
		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spmd = spmd_addr(cpu, *spgd, vaddr);

	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);

		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}

		/* We check that the Guest pmd is OK. */
		check_gpmd(cpu, gpmd);

		/*
		 * And we copy the flags to the shadow PMD entry.  The page
		 * number in the shadow PMD is the page we just allocated.
		 */
		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
	}

	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
#else
	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif

	if (unlikely(cpu->linear_pages)) {
		/* Linear?  Make up a PTE which points to same page. */
		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
	} else {
		/* Read the actual PTE value. */
		gpte = lgread(cpu, gpte_ptr, pte_t);
	}

	/* If this page isn't in the Guest page tables, we can't page it in. */
	if (!(pte_flags(gpte) & _PAGE_PRESENT))
		return false;

	/*
	 * Check they're not trying to write to a page the Guest wants
	 * read-only (bit 2 of errcode == write).
	 */
	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
		return false;

	/* User access to a kernel-only page? (bit 3 == user access) */
	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
		return false;

	/*
	 * Check that the Guest PTE flags are OK, and the page number is below
	 * the pfn_limit (ie. not mapping the Launcher binary).
	 */
	check_gpte(cpu, gpte);

	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
	gpte = pte_mkyoung(gpte);
	if (errcode & 2)
		gpte = pte_mkdirty(gpte);

	/* Get the pointer to the shadow PTE entry we're going to set. */
	spte = spte_addr(cpu, *spgd, vaddr);

	/*
	 * If there was a valid shadow PTE entry here before, we release it.
	 * This can happen with a write to a previously read-only entry.
	 */
	release_pte(*spte);

	/*
	 * If this is a write, we insist that the Guest page is writable (the
	 * final arg to gpte_to_spte()).
	 */
	if (pte_dirty(gpte))
		*spte = gpte_to_spte(cpu, gpte, 1);
	else
		/*
		 * If this is a read, don't set the "writable" bit in the page
		 * table entry, even if the Guest says it's writable.  That way
		 * we will come back here when a write does actually occur, so
		 * we can update the Guest's _PAGE_DIRTY flag.
		 */
		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

	/*
	 * Finally, we write the Guest PTE entry back: we've set the
	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
	 */
	if (likely(!cpu->linear_pages))
		lgwrite(cpu, gpte_ptr, pte_t, gpte);

	/*
	 * The fault is fixed, the page table is populated, the mapping
	 * manipulated, the result returned and the code complete.  A small
	 * delay and a trace of alliteration are the only indications the Guest
	 * has that a page fault occurred at all.
	 */
	return true;
}
Exemple #21
0
/*H:330
 * (i) Looking up a page table entry when the Guest faults.
 *
 * We saw this call in run_guest(): when we see a page fault in the Guest, we
 * come here.  That's because we only set up the shadow page tables lazily as
 * they're needed, so we get page faults all the time and quietly fix them up
 * and return to the Guest without it knowing.
 *
 * If we fixed up the fault (ie. we mapped the address), this routine returns
 * true.  Otherwise, it was a real fault and we need to tell the Guest.
 *
 * There's a corner case: they're trying to access memory between
 * pfn_limit and device_limit, which is I/O memory.  In this case, we
 * return false and set @iomem to the physical address, so the the
 * Launcher can handle the instruction manually.
 */
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode,
		 unsigned long *iomem)
{
	unsigned long gpte_ptr;
	pte_t gpte;
	pte_t *spte;
	pmd_t gpmd;
	pgd_t gpgd;

	*iomem = 0;

	/* We never demand page the Switcher, so trying is a mistake. */
	if (vaddr >= switcher_addr)
		return false;

	/* First step: get the top-level Guest page table entry. */
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpgd = __pgd(CHECK_GPGD_MASK);
	} else {
		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
		/* Toplevel not present?  We can't map it in. */
		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
			return false;

		/* 
		 * This kills the Guest if it has weird flags or tries to
		 * refer to a "physical" address outside the bounds.
		 */
		if (!check_gpgd(cpu, gpgd))
			return false;
	}

	/* This "mid-level" entry is only used for non-linear, PAE mode. */
	gpmd = __pmd(_PAGE_TABLE);

#ifdef CONFIG_X86_PAE
	if (likely(!cpu->linear_pages)) {
		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
		/* Middle level not present?  We can't map it in. */
		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			return false;

		/* 
		 * This kills the Guest if it has weird flags or tries to
		 * refer to a "physical" address outside the bounds.
		 */
		if (!check_gpmd(cpu, gpmd))
			return false;
	}

	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
#else
	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif

	if (unlikely(cpu->linear_pages)) {
		/* Linear?  Make up a PTE which points to same page. */
		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
	} else {
		/* Read the actual PTE value. */
		gpte = lgread(cpu, gpte_ptr, pte_t);
	}

	/* If this page isn't in the Guest page tables, we can't page it in. */
	if (!(pte_flags(gpte) & _PAGE_PRESENT))
		return false;

	/*
	 * Check they're not trying to write to a page the Guest wants
	 * read-only (bit 2 of errcode == write).
	 */
	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
		return false;

	/* User access to a kernel-only page? (bit 3 == user access) */
	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
		return false;

	/* If they're accessing io memory, we expect a fault. */
	if (gpte_in_iomem(cpu, gpte)) {
		*iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
		return false;
	}

	/*
	 * Check that the Guest PTE flags are OK, and the page number is below
	 * the pfn_limit (ie. not mapping the Launcher binary).
	 */
	if (!check_gpte(cpu, gpte))
		return false;

	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
	gpte = pte_mkyoung(gpte);
	if (errcode & 2)
		gpte = pte_mkdirty(gpte);

	/* Get the pointer to the shadow PTE entry we're going to set. */
	spte = find_spte(cpu, vaddr, true, pgd_flags(gpgd), pmd_flags(gpmd));
	if (!spte)
		return false;

	/*
	 * If there was a valid shadow PTE entry here before, we release it.
	 * This can happen with a write to a previously read-only entry.
	 */
	release_pte(*spte);

	/*
	 * If this is a write, we insist that the Guest page is writable (the
	 * final arg to gpte_to_spte()).
	 */
	if (pte_dirty(gpte))
		*spte = gpte_to_spte(cpu, gpte, 1);
	else
		/*
		 * If this is a read, don't set the "writable" bit in the page
		 * table entry, even if the Guest says it's writable.  That way
		 * we will come back here when a write does actually occur, so
		 * we can update the Guest's _PAGE_DIRTY flag.
		 */
		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

	/*
	 * Finally, we write the Guest PTE entry back: we've set the
	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
	 */
	if (likely(!cpu->linear_pages))
		lgwrite(cpu, gpte_ptr, pte_t, gpte);

	/*
	 * The fault is fixed, the page table is populated, the mapping
	 * manipulated, the result returned and the code complete.  A small
	 * delay and a trace of alliteration are the only indications the Guest
	 * has that a page fault occurred at all.
	 */
	return true;
}
Exemple #22
0
/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 */
void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
	unsigned long address, int write_access)
{
	pgd_t * pgd;
	pmd_t * pmd;
	pte_t * page_table;
	pte_t entry;
	unsigned long page;

	pgd = pgd_offset(tsk->mm, address);
	pmd = pmd_alloc(pgd, address);
	if (!pmd)
		goto no_memory;
	page_table = pte_alloc(pmd, address);
	if (!page_table)
		goto no_memory;
	entry = *page_table;
	if (pte_present(entry))
		goto is_present;
	if (!pte_none(entry))
		goto swap_page;
	address &= PAGE_MASK;
	if (!vma->vm_ops || !vma->vm_ops->nopage)
		goto anonymous_page;
	/*
	 * The third argument is "no_share", which tells the low-level code
	 * to copy, not share the page even if sharing is possible.  It's
	 * essentially an early COW detection 
	 */
	page = vma->vm_ops->nopage(vma, address, 
		(vma->vm_flags & VM_SHARED)?0:write_access);
	if (!page)
		goto sigbus;
	++tsk->maj_flt;
	++vma->vm_mm->rss;
	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * a exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	flush_page_to_ram(page);
	entry = mk_pte(page, vma->vm_page_prot);
	if (write_access) {
		entry = pte_mkwrite(pte_mkdirty(entry));
	} else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
		entry = pte_wrprotect(entry);
	put_page(page_table, entry);
	/* no need to invalidate: a not-present page shouldn't be cached */
	return;

anonymous_page:
	entry = pte_wrprotect(mk_pte(ZERO_PAGE, vma->vm_page_prot));
	if (write_access) {
		unsigned long page = __get_free_page(GFP_KERNEL);
		if (!page)
			goto sigbus;
		memset((void *) page, 0, PAGE_SIZE);
		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
		vma->vm_mm->rss++;
		tsk->min_flt++;
		flush_page_to_ram(page);
	}
	put_page(page_table, entry);
	return;

sigbus:
	force_sig(SIGBUS, current);
	put_page(page_table, BAD_PAGE);
	/* no need to invalidate, wasn't present */
	return;

swap_page:
	do_swap_page(tsk, vma, address, page_table, entry, write_access);
	return;

no_memory:
	oom(tsk);
is_present:
	return;
}
Exemple #23
0
int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
{
	unsigned long flags, mmuar, mmutr;
	struct mm_struct *mm;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	int asid;

	local_irq_save(flags);

	mmuar = (dtlb) ? mmu_read(MMUAR) :
		regs->pc + (extension_word * sizeof(long));

	mm = (!user_mode(regs) && KMAPAREA(mmuar)) ? &init_mm : current->mm;
	if (!mm) {
		local_irq_restore(flags);
		return -1;
	}

	pgd = pgd_offset(mm, mmuar);
	if (pgd_none(*pgd))  {
		local_irq_restore(flags);
		return -1;
	}

	pmd = pmd_offset(pgd, mmuar);
	if (pmd_none(*pmd)) {
		local_irq_restore(flags);
		return -1;
	}

	pte = (KMAPAREA(mmuar)) ? pte_offset_kernel(pmd, mmuar)
				: pte_offset_map(pmd, mmuar);
	if (pte_none(*pte) || !pte_present(*pte)) {
		local_irq_restore(flags);
		return -1;
	}

	if (write) {
		if (!pte_write(*pte)) {
			local_irq_restore(flags);
			return -1;
		}
		set_pte(pte, pte_mkdirty(*pte));
	}

	set_pte(pte, pte_mkyoung(*pte));
	asid = mm->context & 0xff;
	if (!pte_dirty(*pte) && !KMAPAREA(mmuar))
		set_pte(pte, pte_wrprotect(*pte));

	mmutr = (mmuar & PAGE_MASK) | (asid << MMUTR_IDN) | MMUTR_V;
	if ((mmuar < TASK_UNMAPPED_BASE) || (mmuar >= TASK_SIZE))
		mmutr |= (pte->pte & CF_PAGE_MMUTR_MASK) >> CF_PAGE_MMUTR_SHIFT;
	mmu_write(MMUTR, mmutr);

	mmu_write(MMUDR, (pte_val(*pte) & PAGE_MASK) |
		((pte->pte) & CF_PAGE_MMUDR_MASK) | MMUDR_SZ_8KB | MMUDR_X);

	if (dtlb)
		mmu_write(MMUOR, MMUOR_ACC | MMUOR_UAA);
	else
		mmu_write(MMUOR, MMUOR_ITLB | MMUOR_ACC | MMUOR_UAA);

	local_irq_restore(flags);
	return 0;
}