/* There are a couple of legacy places where the kernel sets a PTE, but we * don't know the top level any more. This is useless for us, since we don't * know which pagetable is changing or what address, so we just tell the Host * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, * which makes booting astonishingly slow. So we don't even tell the Host * anything changed until we've done the first page table switch. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { *ptep = pteval; /* Don't bother with hypercall before initial setup. */ if (current_cr3) lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); }
/* OK, I lied. There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements * __thread variables). So we have a hypercall specifically for this case. */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { /* There's one problem which normal hardware doesn't have: the Host * can't handle us removing entries we're currently using. So we clear * the GS register here: if it's needed it'll be reloaded anyway. */ loadsegment(gs, 0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); }
/* The Guest needs to tell the Host what stack it expects traps to use. For * native hardware, this is part of the Task State Segment mentioned above in * lguest_load_tr_desc(), but to help hypervisors there's this special call. * * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data * segment), the privilege level (we're privilege level 1, the Host is 0 and * will not tolerate us trying to use that), the stack pointer, and the number * of pages in the stack. */ static void lguest_load_sp0(struct tss_struct *tss, struct thread_struct *thread) { lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, THREAD_SIZE/PAGE_SIZE); }
/* This is called when the kernel page tables have changed. That's not very * common (unless the Guest is using highmem, which makes the Guest extremely * slow), so it's worth separating this from the user flushing above. */ static void lguest_flush_tlb_kernel(void) { lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); }
/* This is what happens after the Guest has removed a large number of entries. * This tells the Host that any of the page table entries for userspace might * have changed, ie. virtual addresses below PAGE_OFFSET. */ static void lguest_flush_tlb_user(void) { lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); }
/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on * native page table operations. On native hardware you can set a new page * table entry whenever you want, but if you want to remove one you have to do * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). * * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only * called when a valid entry is written, not when it's removed (ie. marked not * present). Instead, this is where we come when the Guest wants to remove a * page table entry: we tell the Host to set that entry to 0 (ie. the present * bit is zero). */ static void lguest_flush_tlb_single(unsigned long addr) { /* Simply set it to zero: if it was not, it will fault back in. */ lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); }
/* There are a couple of legacy places where the kernel sets a PTE, but we * don't know the top level any more. This is useless for us, since we don't * know which pagetable is changing or what address, so we just tell the Host * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell * the Host anything changed until we've done the first page table switch, * which brings boot back to 0.25 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { *ptep = pteval; if (cr3_changed) lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); }
/* The Guest calls this to set a top-level entry. Again, we set the entry then * tell the Host which top-level page we changed, and the index of the entry we * changed. */ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { *pmdp = pmdval; lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); }
/* The Guest calls this to set a second-level entry (pte), ie. to map a page * into a process' address space. We set the entry then tell the Host the * toplevel and address this corresponds to. The Guest uses one pagetable per * process, so we need to tell the Host which one we're changing (mm->pgd). */ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { *ptep = pteval; lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); }
/* cr3 is the current toplevel pagetable page: the principle is the same as * cr0. Keep a local copy, and tell the Host when it changes. The only * difference is that our local copy is in lguest_data because the Host needs * to set it upon our initial hypercall. */ static void lguest_write_cr3(unsigned long cr3) { lguest_data.pgdir = cr3; lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); cr3_changed = true; }
/* Intel provided a special instruction to clear the TS bit for people too cool * to use write_cr0() to do it. This "clts" instruction is faster, because all * the vowels have been optimized out. */ static void lguest_clts(void) { lazy_hcall(LHCALL_TS, 0, 0, 0); current_cr0 &= ~X86_CR0_TS; }
static void lguest_write_cr0(unsigned long val) { lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0); current_cr0 = val; }
/* The Guest calls this to set a second-level entry (pte), ie. to map a page * into a process' address space. We set the entry then tell the Host the * toplevel and address this corresponds to. The Guest uses one pagetable per * process, so we need to tell the Host which one we're changing (mm->pgd). */ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); }
/* cr3 is the current toplevel pagetable page: the principle is the same as * cr0. Keep a local copy, and tell the Host when it changes. */ static void lguest_write_cr3(unsigned long cr3) { lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); current_cr3 = cr3; }