Exemple #1
0
/*H:481
 * This clears the Switcher mappings for cpu #i.
 */
static void remove_switcher_percpu_map(struct lg_cpu *cpu, unsigned int i)
{
	unsigned long base = switcher_addr + PAGE_SIZE + i * PAGE_SIZE*2;
	pte_t *pte;

	/* Clear the mappings for both pages. */
	pte = find_spte(cpu, base, false, 0, 0);
	release_pte(*pte);
	set_pte(pte, __pte(0));

	pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0);
	release_pte(*pte);
	set_pte(pte, __pte(0));
}
Exemple #2
0
static void release_pmd(pmd_t *spmd)
{
	/* If the entry's not present, there's nothing to release. */
	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
		unsigned int i;
		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
		/* For each entry in the page, we might need to release it. */
		for (i = 0; i < PTRS_PER_PTE; i++)
			release_pte(ptepage[i]);
		/* Now we can free the page of PTEs */
		free_page((long)ptepage);
		/* And zero out the PMD entry so we never release it twice. */
		set_pmd(spmd, __pmd(0));
	}
}
Exemple #3
0
/*H:420
 * This is the routine which actually sets the page table entry for then
 * "idx"'th shadow page table.
 *
 * Normally, we can just throw out the old entry and replace it with 0: if they
 * use it demand_page() will put the new entry in.  We need to do this anyway:
 * The Guest expects _PAGE_ACCESSED to be set on its PTE the first time a page
 * is read from, and _PAGE_DIRTY when it's written to.
 *
 * But Avi Kivity pointed out that most Operating Systems (Linux included) set
 * these bits on PTEs immediately anyway.  This is done to save the CPU from
 * having to update them, but it helps us the same way: if they set
 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
 */
static void __guest_set_pte(struct lg_cpu *cpu, int idx,
		       unsigned long vaddr, pte_t gpte)
{
	/* Look up the matching shadow page directory entry. */
	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
#ifdef CONFIG_X86_PAE
	pmd_t *spmd;
#endif

	/* If the top level isn't present, there's no entry to update. */
	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
#ifdef CONFIG_X86_PAE
		spmd = spmd_addr(cpu, *spgd, vaddr);
		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
#endif
			/* Otherwise, start by releasing the existing entry. */
			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
			release_pte(*spte);

			/*
			 * If they're setting this entry as dirty or accessed,
			 * we might as well put that entry they've given us in
			 * now.  This shaves 10% off a copy-on-write
			 * micro-benchmark.
			 */
			if ((pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED))
			    && !gpte_in_iomem(cpu, gpte)) {
				if (!check_gpte(cpu, gpte))
					return;
				set_pte(spte,
					gpte_to_spte(cpu, gpte,
						pte_flags(gpte) & _PAGE_DIRTY));
			} else {
				/*
				 * Otherwise kill it and we can demand_page()
				 * it in later.
				 */
				set_pte(spte, __pte(0));
			}
#ifdef CONFIG_X86_PAE
		}
#endif
	}
}
Exemple #4
0
/*H:450
 * If we chase down the release_pgd() code, the non-PAE version looks like
 * this.  The PAE version is almost identical, but instead of calling
 * release_pte it calls release_pmd(), which looks much like this.
 */
static void release_pgd(pgd_t *spgd)
{
	/* If the entry's not present, there's nothing to release. */
	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
		unsigned int i;
		/*
		 * Converting the pfn to find the actual PTE page is easy: turn
		 * the page number into a physical address, then convert to a
		 * virtual address (easy for kernel pages like this one).
		 */
		pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
		/* For each entry in the page, we might need to release it. */
		for (i = 0; i < PTRS_PER_PTE; i++)
			release_pte(ptepage[i]);
		/* Now we can free the page of PTEs */
		free_page((long)ptepage);
		/* And zero out the PGD entry so we never release it twice. */
		*spgd = __pgd(0);
	}
}
Exemple #5
0
/*H:330
 * (i) Looking up a page table entry when the Guest faults.
 *
 * We saw this call in run_guest(): when we see a page fault in the Guest, we
 * come here.  That's because we only set up the shadow page tables lazily as
 * they're needed, so we get page faults all the time and quietly fix them up
 * and return to the Guest without it knowing.
 *
 * If we fixed up the fault (ie. we mapped the address), this routine returns
 * true.  Otherwise, it was a real fault and we need to tell the Guest.
 */
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
{
	pgd_t gpgd;
	pgd_t *spgd;
	unsigned long gpte_ptr;
	pte_t gpte;
	pte_t *spte;

	/* Mid level for PAE. */
#ifdef CONFIG_X86_PAE
	pmd_t *spmd;
	pmd_t gpmd;
#endif

	/* First step: get the top-level Guest page table entry. */
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpgd = __pgd(CHECK_GPGD_MASK);
	} else {
		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
		/* Toplevel not present?  We can't map it in. */
		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
	if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}
		/* We check that the Guest pgd is OK. */
		check_gpgd(cpu, gpgd);
		/*
		 * And we copy the flags to the shadow PGD entry.  The page
		 * number in the shadow PGD is the page we just allocated.
		 */
		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
	}

#ifdef CONFIG_X86_PAE
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpmd = __pmd(_PAGE_TABLE);
	} else {
		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
		/* Middle level not present?  We can't map it in. */
		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			return false;
	}

	/* Now look at the matching shadow entry. */
	spmd = spmd_addr(cpu, *spgd, vaddr);

	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
		/* No shadow entry: allocate a new shadow PTE page. */
		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);

		/*
		 * This is not really the Guest's fault, but killing it is
		 * simple for this corner case.
		 */
		if (!ptepage) {
			kill_guest(cpu, "out of memory allocating pte page");
			return false;
		}

		/* We check that the Guest pmd is OK. */
		check_gpmd(cpu, gpmd);

		/*
		 * And we copy the flags to the shadow PMD entry.  The page
		 * number in the shadow PMD is the page we just allocated.
		 */
		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
	}

	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
#else
	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif

	if (unlikely(cpu->linear_pages)) {
		/* Linear?  Make up a PTE which points to same page. */
		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
	} else {
		/* Read the actual PTE value. */
		gpte = lgread(cpu, gpte_ptr, pte_t);
	}

	/* If this page isn't in the Guest page tables, we can't page it in. */
	if (!(pte_flags(gpte) & _PAGE_PRESENT))
		return false;

	/*
	 * Check they're not trying to write to a page the Guest wants
	 * read-only (bit 2 of errcode == write).
	 */
	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
		return false;

	/* User access to a kernel-only page? (bit 3 == user access) */
	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
		return false;

	/*
	 * Check that the Guest PTE flags are OK, and the page number is below
	 * the pfn_limit (ie. not mapping the Launcher binary).
	 */
	check_gpte(cpu, gpte);

	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
	gpte = pte_mkyoung(gpte);
	if (errcode & 2)
		gpte = pte_mkdirty(gpte);

	/* Get the pointer to the shadow PTE entry we're going to set. */
	spte = spte_addr(cpu, *spgd, vaddr);

	/*
	 * If there was a valid shadow PTE entry here before, we release it.
	 * This can happen with a write to a previously read-only entry.
	 */
	release_pte(*spte);

	/*
	 * If this is a write, we insist that the Guest page is writable (the
	 * final arg to gpte_to_spte()).
	 */
	if (pte_dirty(gpte))
		*spte = gpte_to_spte(cpu, gpte, 1);
	else
		/*
		 * If this is a read, don't set the "writable" bit in the page
		 * table entry, even if the Guest says it's writable.  That way
		 * we will come back here when a write does actually occur, so
		 * we can update the Guest's _PAGE_DIRTY flag.
		 */
		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

	/*
	 * Finally, we write the Guest PTE entry back: we've set the
	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
	 */
	if (likely(!cpu->linear_pages))
		lgwrite(cpu, gpte_ptr, pte_t, gpte);

	/*
	 * The fault is fixed, the page table is populated, the mapping
	 * manipulated, the result returned and the code complete.  A small
	 * delay and a trace of alliteration are the only indications the Guest
	 * has that a page fault occurred at all.
	 */
	return true;
}
Exemple #6
0
/*H:330
 * (i) Looking up a page table entry when the Guest faults.
 *
 * We saw this call in run_guest(): when we see a page fault in the Guest, we
 * come here.  That's because we only set up the shadow page tables lazily as
 * they're needed, so we get page faults all the time and quietly fix them up
 * and return to the Guest without it knowing.
 *
 * If we fixed up the fault (ie. we mapped the address), this routine returns
 * true.  Otherwise, it was a real fault and we need to tell the Guest.
 *
 * There's a corner case: they're trying to access memory between
 * pfn_limit and device_limit, which is I/O memory.  In this case, we
 * return false and set @iomem to the physical address, so the the
 * Launcher can handle the instruction manually.
 */
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode,
		 unsigned long *iomem)
{
	unsigned long gpte_ptr;
	pte_t gpte;
	pte_t *spte;
	pmd_t gpmd;
	pgd_t gpgd;

	*iomem = 0;

	/* We never demand page the Switcher, so trying is a mistake. */
	if (vaddr >= switcher_addr)
		return false;

	/* First step: get the top-level Guest page table entry. */
	if (unlikely(cpu->linear_pages)) {
		/* Faking up a linear mapping. */
		gpgd = __pgd(CHECK_GPGD_MASK);
	} else {
		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
		/* Toplevel not present?  We can't map it in. */
		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
			return false;

		/* 
		 * This kills the Guest if it has weird flags or tries to
		 * refer to a "physical" address outside the bounds.
		 */
		if (!check_gpgd(cpu, gpgd))
			return false;
	}

	/* This "mid-level" entry is only used for non-linear, PAE mode. */
	gpmd = __pmd(_PAGE_TABLE);

#ifdef CONFIG_X86_PAE
	if (likely(!cpu->linear_pages)) {
		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
		/* Middle level not present?  We can't map it in. */
		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			return false;

		/* 
		 * This kills the Guest if it has weird flags or tries to
		 * refer to a "physical" address outside the bounds.
		 */
		if (!check_gpmd(cpu, gpmd))
			return false;
	}

	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
#else
	/*
	 * OK, now we look at the lower level in the Guest page table: keep its
	 * address, because we might update it later.
	 */
	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif

	if (unlikely(cpu->linear_pages)) {
		/* Linear?  Make up a PTE which points to same page. */
		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
	} else {
		/* Read the actual PTE value. */
		gpte = lgread(cpu, gpte_ptr, pte_t);
	}

	/* If this page isn't in the Guest page tables, we can't page it in. */
	if (!(pte_flags(gpte) & _PAGE_PRESENT))
		return false;

	/*
	 * Check they're not trying to write to a page the Guest wants
	 * read-only (bit 2 of errcode == write).
	 */
	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
		return false;

	/* User access to a kernel-only page? (bit 3 == user access) */
	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
		return false;

	/* If they're accessing io memory, we expect a fault. */
	if (gpte_in_iomem(cpu, gpte)) {
		*iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
		return false;
	}

	/*
	 * Check that the Guest PTE flags are OK, and the page number is below
	 * the pfn_limit (ie. not mapping the Launcher binary).
	 */
	if (!check_gpte(cpu, gpte))
		return false;

	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
	gpte = pte_mkyoung(gpte);
	if (errcode & 2)
		gpte = pte_mkdirty(gpte);

	/* Get the pointer to the shadow PTE entry we're going to set. */
	spte = find_spte(cpu, vaddr, true, pgd_flags(gpgd), pmd_flags(gpmd));
	if (!spte)
		return false;

	/*
	 * If there was a valid shadow PTE entry here before, we release it.
	 * This can happen with a write to a previously read-only entry.
	 */
	release_pte(*spte);

	/*
	 * If this is a write, we insist that the Guest page is writable (the
	 * final arg to gpte_to_spte()).
	 */
	if (pte_dirty(gpte))
		*spte = gpte_to_spte(cpu, gpte, 1);
	else
		/*
		 * If this is a read, don't set the "writable" bit in the page
		 * table entry, even if the Guest says it's writable.  That way
		 * we will come back here when a write does actually occur, so
		 * we can update the Guest's _PAGE_DIRTY flag.
		 */
		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

	/*
	 * Finally, we write the Guest PTE entry back: we've set the
	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
	 */
	if (likely(!cpu->linear_pages))
		lgwrite(cpu, gpte_ptr, pte_t, gpte);

	/*
	 * The fault is fixed, the page table is populated, the mapping
	 * manipulated, the result returned and the code complete.  A small
	 * delay and a trace of alliteration are the only indications the Guest
	 * has that a page fault occurred at all.
	 */
	return true;
}