/* Last of all, we look at what happens first of all. The very first time the * Guest makes a hypercall, we end up here to set things up: */ static void initialize(struct lg_cpu *cpu) { /* You can't do anything until you're initialized. The Guest knows the * rules, so we're unforgiving here. */ if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); return; } if (lguest_arch_init_hypercalls(cpu)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* The Guest tells us where we're not to deliver interrupts by putting * the range of addresses into "struct lguest_data". */ if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* We write the current time into the Guest's data page once so it can * set its clock. */ write_timestamp(cpu); /* page_tables.c will also do some setup. */ page_table_guest_data_init(cpu); /* This is the one case where the above accesses might have been the * first write to a Guest page. This may have caused a copy-on-write * fault, but the old page might be (read-only) in the Guest * pagetable. */ guest_pagetable_clear_all(cpu); }
/*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ void page_table_guest_data_init(struct lg_cpu *cpu) { /* * We tell the Guest that it can't use the virtual addresses * used by the Switcher. This trick is equivalent to 4GB - * switcher_addr. */ u32 top = ~switcher_addr + 1; /* We get the kernel address: above this is all kernel memory. */ if (get_user(cpu->lg->kernel_address, &cpu->lg->lguest_data->kernel_address) /* * We tell the Guest that it can't use the top virtual * addresses (used by the Switcher). */ || put_user(top, &cpu->lg->lguest_data->reserve_mem)) { kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); return; } /* * In flush_user_mappings() we loop from 0 to * "pgd_index(lg->kernel_address)". This assumes it won't hit the * Switcher mappings, so check that now. */ if (cpu->lg->kernel_address >= switcher_addr) kill_guest(cpu, "bad kernel address %#lx", cpu->lg->kernel_address); }
/* We walk down the guest page tables to get a guest-physical address */ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) { pgd_t gpgd; pte_t gpte; #ifdef CONFIG_X86_PAE pmd_t gpmd; #endif /* Still not set up? Just map 1:1. */ if (unlikely(cpu->linear_pages)) return vaddr; /* First step: get the top-level Guest page table entry. */ gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) { kill_guest(cpu, "Bad address %#lx", vaddr); return -1UL; } #ifdef CONFIG_X86_PAE gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) kill_guest(cpu, "Bad address %#lx", vaddr); gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t); #else gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t); #endif if (!(pte_flags(gpte) & _PAGE_PRESENT)) kill_guest(cpu, "Bad address %#lx", vaddr); return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); }
/*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ void page_table_guest_data_init(struct lg_cpu *cpu) { /* We get the kernel address: above this is all kernel memory. */ if (get_user(cpu->lg->kernel_address, &cpu->lg->lguest_data->kernel_address) /* * We tell the Guest that it can't use the top 2 or 4 MB * of virtual addresses used by the Switcher. */ || put_user(RESERVE_MEM * 1024 * 1024, &cpu->lg->lguest_data->reserve_mem)) { kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); return; } /* * In flush_user_mappings() we loop from 0 to * "pgd_index(lg->kernel_address)". This assumes it won't hit the * Switcher mappings, so check that now. */ #ifdef CONFIG_X86_PAE if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) #else if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) #endif kill_guest(cpu, "bad kernel address %#lx", cpu->lg->kernel_address); }
/*H:124 * Asynchronous hypercalls are easy: we just look in the array in the * Guest's "struct lguest_data" to see if any new ones are marked "ready". * * We are careful to do these in order: obviously we respect the order the * Guest put them in the ring, but we also promise the Guest that they will * happen before any normal hypercall (which is why we check this before * checking for a normal hcall). */ static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; u8 st[LHCALL_RING_SIZE]; /* For simplicity, we copy the entire call status array in at once. */ if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st))) return; /* We process "struct lguest_data"s hcalls[] ring once. */ for (i = 0; i < ARRAY_SIZE(st); i++) { struct hcall_args args; /* * We remember where we were up to from last time. This makes * sure that the hypercalls are done in the order the Guest * places them in the ring. */ unsigned int n = cpu->next_hcall; /* 0xFF means there's no call here (yet). */ if (st[n] == 0xFF) break; /* * OK, we have hypercall. Increment the "next_hcall" cursor, * and wrap back to 0 if we reach the end. */ if (++cpu->next_hcall == LHCALL_RING_SIZE) cpu->next_hcall = 0; /* * Copy the hypercall arguments into a local copy of the * hcall_args struct. */ if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], sizeof(struct hcall_args))) { kill_guest(cpu, "Fetching async hypercalls"); break; } /* Do the hypercall, same as a normal one. */ do_hcall(cpu, &args); /* Mark the hypercall done. */ if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) { kill_guest(cpu, "Writing result for async hypercall"); break; } /* * Stop doing hypercalls if they want to notify the Launcher: * it needs to service this first. */ if (cpu->pending_notify) break; } }
/* * So, when pin_stack_pages() asks us to pin a page, we check if it's already * in the page tables, and if not, we call demand_page() with error code 2 * (meaning "write"). */ void pin_page(struct lg_cpu *cpu, unsigned long vaddr) { unsigned long iomem; if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2, &iomem)) kill_guest(cpu, "bad stack page %#lx", vaddr); }
/* * This actually makes the Guest start executing the given interrupt/trap * handler. * * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this * interrupt or trap. It's split into two parts for traditional reasons: gcc * on i386 used to be frightened by 64 bit numbers. */ static void guest_run_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi) { /* If we're already in the kernel, we don't change stacks. */ if ((cpu->regs->ss&0x3) != GUEST_PL) cpu->regs->ss = cpu->esp1; /* * Set the code segment and the address to execute. */ cpu->regs->cs = (__KERNEL_CS|GUEST_PL); cpu->regs->eip = idt_address(lo, hi); /* * Trapping always clears these flags: * TF: Trap flag * VM: Virtual 8086 mode * RF: Resume * NT: Nested task. */ cpu->regs->eflags &= ~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT); /* * There are two kinds of interrupt handlers: 0xE is an "interrupt * gate" which expects interrupts to be disabled on entry. */ if (idt_type(lo, hi) == 0xE) if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) kill_guest(cpu, "Disabling interrupts"); }
/* Direct traps also mean that we need to know whenever the Guest wants to use * a different kernel stack, so we can change the IDT entries to use that * stack. The IDT entries expect a virtual address, so unlike most addresses * the Guest gives us, the "esp" (stack pointer) value here is virtual, not * physical. * * In Linux each process has its own kernel stack, so this happens a lot: we * change stacks on each context switch. */ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) { /* You are not allowed have a stack segment with privilege level 0: bad * Guest! */ if ((seg & 0x3) != GUEST_PL) kill_guest(cpu, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ if (pages > 2) kill_guest(cpu, "bad stack pages %u", pages); /* Save where the stack is, and how many pages */ cpu->ss1 = seg; cpu->esp1 = esp; cpu->lg->stack_pages = pages; /* Make sure the new stack pages are mapped */ pin_stack_pages(cpu); }
/* This is the write (copy into Guest) version. */ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, unsigned bytes) { if (!lguest_address_ok(cpu->lg, addr, bytes) || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0) kill_guest(cpu, "bad write address %#lx len %u", addr, bytes); }
/*H:340 * Converting a Guest page table entry to a shadow (ie. real) page table * entry can be a little tricky. The flags are (almost) the same, but the * Guest PTE contains a virtual page number: the CPU needs the real page * number. */ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) { unsigned long pfn, base, flags; /* * The Guest sets the global flag, because it thinks that it is using * PGE. We only told it to use PGE so it would tell us whether it was * flushing a kernel mapping or a userspace mapping. We don't actually * use the global bit, so throw it away. */ flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); /* The Guest's pages are offset inside the Launcher. */ base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; /* * We need a temporary "unsigned long" variable to hold the answer from * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't * fit in spte.pfn. get_pfn() finds the real physical number of the * page, given the virtual number. */ pfn = get_pfn(base + pte_pfn(gpte), write); if (pfn == -1UL) { kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); /* * When we destroy the Guest, we'll go through the shadow page * tables and release_pte() them. Make sure we don't think * this one is valid! */ flags = 0; } /* Now we assemble our shadow PTE from the page number and flags. */ return pfn_pte(pfn, __pgprot(flags)); }
/*H:410 * Updating a PTE entry is a little trickier. * * We keep track of several different page tables (the Guest uses one for each * process, so it makes sense to cache at least a few). Each of these have * identical kernel parts: ie. every mapping above PAGE_OFFSET is the same for * all processes. So when the page table above that address changes, we update * all the page tables, not just the current one. This is rare. * * The benefit is that when we have to track a new page table, we can keep all * the kernel mappings. This speeds up context switch immensely. */ void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t gpte) { /* We don't let you remap the Switcher; we need it to get back! */ if (vaddr >= switcher_addr) { kill_guest(cpu, "attempt to set pte into Switcher pages"); return; } /* * Kernel mappings must be changed on all top levels. Slow, but doesn't * happen often. */ if (vaddr >= cpu->lg->kernel_address) { unsigned int i; for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) if (cpu->lg->pgdirs[i].pgdir) __guest_set_pte(cpu, i, vaddr, gpte); } else { /* Is this page table one we have a shadow for? */ int pgdir = find_pgdir(cpu->lg, gpgdir); if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs)) /* If so, do the update. */ __guest_set_pte(cpu, pgdir, vaddr, gpte); } }
/* This routine supplies the Guest with time: it's used for wallclock time at * initial boot and as a rough time source if the TSC isn't available. */ void write_timestamp(struct lguest *lg) { struct timespec now; ktime_get_real_ts(&now); if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec))) kill_guest(lg, "Writing timestamp"); }
/* * This is the version we normally use: kills the Guest if it uses a * bad address */ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) { unsigned long paddr; if (!__guest_pa(cpu, vaddr, &paddr)) kill_guest(cpu, "Bad address %#lx", vaddr); return paddr; }
/* * We also throw away everything when a Guest tells us it's changed a kernel * mapping. Since kernel mappings are in every page table, it's easiest to * throw them all away. This traps the Guest in amber for a while as * everything faults back in, but it's rare. */ void guest_pagetable_clear_all(struct lg_cpu *cpu) { release_all_pagetables(cpu->lg); /* We need the Guest kernel stack mapped again. */ pin_stack_pages(cpu); /* And we need Switcher allocated. */ if (!allocate_switcher_mapping(cpu)) kill_guest(cpu, "Cannot populate switcher mapping"); }
static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) { if ((pmd_flags(gpmd) & ~_PAGE_TABLE) || (pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) { kill_guest(cpu, "bad page middle directory entry"); return false; } return true; }
static bool check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) { if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) || (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) { kill_guest(cpu, "bad page directory entry"); return false; } return true; }
static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) { if ((pte_flags(gpte) & _PAGE_PSE) || pte_pfn(gpte) >= cpu->lg->pfn_limit) { kill_guest(cpu, "bad page table entry"); return false; } return true; }
/* * This routine copies memory from the Guest. Here we can see how useful the * kill_lguest() routine we met in the Launcher can be: we return a random * value (all zeroes) instead of needing to return an error. */ void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) { if (!lguest_address_ok(cpu->lg, addr, bytes) || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) { /* copy_from_user should do this, but as we rely on it... */ memset(b, 0, bytes); kill_guest(cpu, "bad read address %#lx len %u", addr, bytes); } }
static void initialize(struct lg_cpu *cpu) { if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); return; } if (lguest_arch_init_hypercalls(cpu)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); write_timestamp(cpu); page_table_guest_data_init(cpu); guest_pagetable_clear_all(cpu); }
static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; u8 st[LHCALL_RING_SIZE]; if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st))) return; for (i = 0; i < ARRAY_SIZE(st); i++) { struct hcall_args args; unsigned int n = cpu->next_hcall; if (st[n] == 0xFF) break; if (++cpu->next_hcall == LHCALL_RING_SIZE) cpu->next_hcall = 0; if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], sizeof(struct hcall_args))) { kill_guest(cpu, "Fetching async hypercalls"); break; } do_hcall(cpu, &args); if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) { kill_guest(cpu, "Writing result for async hypercall"); break; } if (cpu->pending_notify) break; } }
void lguest_arch_handle_trap(struct lg_cpu *cpu) { switch (cpu->regs->trapnum) { case 13: if (cpu->regs->errcode == 0) { if (emulate_insn(cpu)) return; } break; case 14: if (demand_page(cpu, cpu->arch.last_pagefault, cpu->regs->errcode)) return; if (cpu->lg->lguest_data && put_user(cpu->arch.last_pagefault, &cpu->lg->lguest_data->cr2)) kill_guest(cpu, "Writing cr2"); break; case 7: if (!cpu->ts) return; break; case 32 ... 255: cond_resched(); return; case LGUEST_TRAP_ENTRY: cpu->hcall = (struct hcall_args *)cpu->regs; return; } if (!deliver_trap(cpu, cpu->regs->trapnum)) kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", cpu->regs->trapnum, cpu->regs->eip, cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault : cpu->regs->errcode); }
/*H:320 * The page table code is curly enough to need helper functions to keep it * clear and clean. The kernel itself provides many of them; one advantage * of insisting that the Guest and Host use the same CONFIG_PAE setting. * * There are two functions which return pointers to the shadow (aka "real") * page tables. * * spgd_addr() takes the virtual address and returns a pointer to the top-level * page directory entry (PGD) for that address. Since we keep track of several * page tables, the "i" argument tells us which one we're interested in (it's * usually the current one). */ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) { unsigned int index = pgd_index(vaddr); #ifndef CONFIG_X86_PAE /* We kill any Guest trying to touch the Switcher addresses. */ if (index >= SWITCHER_PGD_INDEX) { kill_guest(cpu, "attempt to access switcher pages"); index = 0; } #endif /* Return a pointer index'th pgd entry for the i'th page table. */ return &cpu->lg->pgdirs[i].pgdir[index]; }
/*H:400 * (iii) Setting up a page table entry when the Guest tells us one has changed. * * Just like we did in interrupts_and_traps.c, it makes sense for us to deal * with the other side of page tables while we're here: what happens when the * Guest asks for a page table to be updated? * * We already saw that demand_page() will fill in the shadow page tables when * needed, so we can simply remove shadow page table entries whenever the Guest * tells us they've changed. When the Guest tries to use the new entry it will * fault and demand_page() will fix it up. * * So with that in mind here's our code to update a (top-level) PGD entry: */ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) { int pgdir; if (idx > PTRS_PER_PGD) { kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u", idx, PTRS_PER_PGD); return; } /* If they're talking about a page table we have a shadow for... */ pgdir = find_pgdir(lg, gpgdir); if (pgdir < ARRAY_SIZE(lg->pgdirs)) { /* ... throw it away. */ release_pgd(lg->pgdirs[pgdir].pgdir + idx); /* That might have been the Switcher mapping, remap it. */ if (!allocate_switcher_mapping(&lg->cpus[0])) { kill_guest(&lg->cpus[0], "Cannot populate switcher mapping"); } lg->pgdirs[pgdir].last_host_cpu = -1; } }
/*H:230 While we're here, dealing with delivering traps and interrupts to the * Guest, we might as well complete the picture: how the Guest tells us where * it wants them to go. This would be simple, except making traps fast * requires some tricks. * * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) { /* Guest never handles: NMI, doublefault, spurious interrupt or * hypercall. We ignore when it tries to set them. */ if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) return; /* Mark the IDT as changed: next time the Guest runs we'll know we have * to copy this again. */ cpu->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ if (num >= ARRAY_SIZE(cpu->arch.idt)) kill_guest(cpu, "Setting idt entry %u", num); else set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); }
/* * This routine then takes the PGD entry given above, which contains the * address of the PMD page. It then returns a pointer to the PMD entry for the * given address. */ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) { unsigned int index = pmd_index(vaddr); pmd_t *page; /* We kill any Guest trying to touch the Switcher addresses. */ if (pgd_index(vaddr) == SWITCHER_PGD_INDEX && index >= SWITCHER_PMD_INDEX) { kill_guest(cpu, "attempt to access switcher pages"); index = 0; } /* You should never call this if the PGD entry wasn't valid */ BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT)); page = __va(pgd_pfn(spgd) << PAGE_SHIFT); return &page[index]; }
/*H:126 i386-specific hypercall initialization: */ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) { u32 tsc_speed; /* * The pointer to the Guest's "struct lguest_data" is the only argument. * We check that address now. */ if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, sizeof(*cpu->lg->lguest_data))) return -EFAULT; /* * Having checked it, we simply set lg->lguest_data to point straight * into the Launcher's memory at the right place and then use * copy_to_user/from_user from now on, instead of lgread/write. I put * this in to show that I'm not immune to writing stupid * optimizations. */ cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; /* * We insist that the Time Stamp Counter exist and doesn't change with * cpu frequency. Some devious chip manufacturers decided that TSC * changes could be handled in software. I decided that time going * backwards might be good for benchmarks, but it's bad for users. * * We also insist that the TSC be stable: the kernel detects unreliable * TSCs for its own purposes, and we use that here. */ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) tsc_speed = tsc_khz; else tsc_speed = 0; if (put_user(tsc_speed, &cpu->lg->lguest_data->tsc_khz)) return -EFAULT; /* The interrupt code might not like the system call vector. */ if (!check_syscall_vector(cpu->lg)) kill_guest(cpu, "bad syscall vector"); return 0; }
/*H:620 * This is where the Guest asks us to load a new GDT entry * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) { /* * We assume the Guest has the same number of GDT entries as the * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ if (num >= ARRAY_SIZE(cpu->arch.gdt)) { kill_guest(cpu, "too many gdt entries %i", num); return; } /* Set it up, then fix it. */ cpu->arch.gdt[num].a = lo; cpu->arch.gdt[num].b = hi; fixup_gdt_table(cpu, num, num+1); /* * Mark that the GDT changed so the core knows it has to copy it again, * even if the Guest is run on the same CPU. */ cpu->changed |= CHANGED_GDT; }
/*H:235 This is the routine which actually checks the Guest's IDT entry and * transfers it into the entry in "struct lguest": */ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, unsigned int num, u32 lo, u32 hi) { u8 type = idt_type(lo, hi); /* We zero-out a not-present entry */ if (!idt_present(lo, hi)) { trap->a = trap->b = 0; return; } /* We only support interrupt and trap gates. */ if (type != 0xE && type != 0xF) kill_guest(cpu, "bad IDT type %i", type); /* We only copy the handler address, present bit, privilege level and * type. The privilege level controls where the trap can be triggered * manually with an "int" instruction. This is usually GUEST_PL, * except for system calls which userspace can use. */ trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); trap->b = (hi&0xFFFFEF00); }
/*H:430 * (iv) Switching page tables * * Now we've seen all the page table setting and manipulation, let's see * what happens when the Guest changes page tables (ie. changes the top-level * pgdir). This occurs on almost every context switch. */ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) { int newpgdir, repin = 0; /* * The very first time they call this, we're actually running without * any page tables; we've been making it up. Throw them away now. */ if (unlikely(cpu->linear_pages)) { release_all_pagetables(cpu->lg); cpu->linear_pages = false; /* Force allocation of a new pgdir. */ newpgdir = ARRAY_SIZE(cpu->lg->pgdirs); } else { /* Look to see if we have this one already. */ newpgdir = find_pgdir(cpu->lg, pgtable); } /* * If not, we allocate or mug an existing one: if it's a fresh one, * repin gets set to 1. */ if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ cpu->cpu_pgd = newpgdir; /* * If it was completely blank, we map in the Guest kernel stack and * the Switcher. */ if (repin) pin_stack_pages(cpu); if (!cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped) { if (!allocate_switcher_mapping(cpu)) kill_guest(cpu, "Cannot populate switcher mapping"); } }
int lguest_arch_init_hypercalls(struct lg_cpu *cpu) { u32 tsc_speed; if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, sizeof(*cpu->lg->lguest_data))) return -EFAULT; cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) tsc_speed = tsc_khz; else tsc_speed = 0; if (put_user(tsc_speed, &cpu->lg->lguest_data->tsc_khz)) return -EFAULT; if (!check_syscall_vector(cpu->lg)) kill_guest(cpu, "bad syscall vector"); return 0; }