/*H:040 * This is the i386-specific code to setup and run the Guest. Interrupts * are disabled: we own the CPU. */ void lguest_arch_run_guest(struct lg_cpu *cpu) { /* * Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ if (cpu->ts && user_has_fpu()) stts(); /* * SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest * won't try it because we don't advertise it in CPUID, but a malicious * Guest (or malicious Guest userspace program) could, so we tell the * CPU to disable it before running the Guest. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); /* * Now we actually run the Guest. It will return when something * interesting happens, and we can examine its registers to see what it * was doing. */ run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); /* * Note that the "regs" structure contains two extra entries which are * not really registers: a trap number which says what interrupt or * trap made the switcher code come back, and an error code which some * traps set. */ /* Restore SYSENTER if it's supposed to be on. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); /* Clear the host TS bit if it was set above. */ if (cpu->ts && user_has_fpu()) clts(); /* * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite * cr2, or we could even move off to a different CPU. */ if (cpu->regs->trapnum == 14) cpu->arch.last_pagefault = read_cr2(); /* * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. * math_state_restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done * before this. */ else if (cpu->regs->trapnum == 7 && !user_has_fpu()) math_state_restore(); }
void __init lguest_arch_host_init(void) { int i; for (i = 0; i < IDT_ENTRIES; i++) default_idt_entries[i] += switcher_offset(); for_each_possible_cpu(i) { struct lguest_pages *pages = lguest_pages(i); struct lguest_ro_state *state = &pages->state; state->host_gdt_desc.size = GDT_SIZE-1; state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); store_idt(&state->host_idt_desc); state->guest_idt_desc.size = sizeof(state->guest_idt)-1; state->guest_idt_desc.address = (long)&state->guest_idt; state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; state->guest_gdt_desc.address = (long)&state->guest_gdt; state->guest_tss.sp0 = (long)(&pages->regs + 1); state->guest_tss.ss0 = LGUEST_DS; state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); setup_default_gdt_entries(state); setup_default_idt_entries(state, default_idt_entries); get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; } lguest_entry.offset = (long)switch_to_guest + switcher_offset(); lguest_entry.segment = LGUEST_CS; get_online_cpus(); if (cpu_has_pge) { cpu_had_pge = 1; on_each_cpu(adjust_pge, (void *)0, 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); } put_online_cpus(); }
void lguest_arch_run_guest(struct lg_cpu *cpu) { if (cpu->ts) unlazy_fpu(current); if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); if (cpu->regs->trapnum == 14) cpu->arch.last_pagefault = read_cr2(); else if (cpu->regs->trapnum == 7) math_state_restore(); }
/*H:020 * Now the Switcher is mapped and every thing else is ready, we need to do * some more i386-specific initialization. */ void __init lguest_arch_host_init(void) { int i; /* * Most of the x86/switcher_32.S doesn't care that it's been moved; on * Intel, jumps are relative, and it doesn't access any references to * external code or data. * * The only exception is the interrupt handlers in switcher.S: their * addresses are placed in a table (default_idt_entries), so we need to * update the table with the new addresses. switcher_offset() is a * convenience function which returns the distance between the * compiled-in switcher code and the high-mapped copy we just made. */ for (i = 0; i < IDT_ENTRIES; i++) default_idt_entries[i] += switcher_offset(); /* * Set up the Switcher's per-cpu areas. * * Each CPU gets two pages of its own within the high-mapped region * (aka. "struct lguest_pages"). Much of this can be initialized now, * but some depends on what Guest we are running (which is set up in * copy_in_guest_info()). */ for_each_possible_cpu(i) { /* lguest_pages() returns this CPU's two pages. */ struct lguest_pages *pages = lguest_pages(i); /* This is a convenience pointer to make the code neater. */ struct lguest_ro_state *state = &pages->state; /* * The Global Descriptor Table: the Host has a different one * for each CPU. We keep a descriptor for the GDT which says * where it is and how big it is (the size is actually the last * byte, not the size, hence the "-1"). */ state->host_gdt_desc.size = GDT_SIZE-1; state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); /* * All CPUs on the Host use the same Interrupt Descriptor * Table, so we just use store_idt(), which gets this CPU's IDT * descriptor. */ store_idt(&state->host_idt_desc); /* * The descriptors for the Guest's GDT and IDT can be filled * out now, too. We copy the GDT & IDT into ->guest_gdt and * ->guest_idt before actually running the Guest. */ state->guest_idt_desc.size = sizeof(state->guest_idt)-1; state->guest_idt_desc.address = (long)&state->guest_idt; state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; state->guest_gdt_desc.address = (long)&state->guest_gdt; /* * We know where we want the stack to be when the Guest enters * the Switcher: in pages->regs. The stack grows upwards, so * we start it at the end of that structure. */ state->guest_tss.sp0 = (long)(&pages->regs + 1); /* * And this is the GDT entry to use for the stack: we keep a * couple of special LGUEST entries. */ state->guest_tss.ss0 = LGUEST_DS; /* * x86 can have a finegrained bitmap which indicates what I/O * ports the process can use. We set it to the end of our * structure, meaning "none". */ state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); /* * Some GDT entries are the same across all Guests, so we can * set them up now. */ setup_default_gdt_entries(state); /* Most IDT entries are the same for all Guests, too.*/ setup_default_idt_entries(state, default_idt_entries); /* * The Host needs to be able to use the LGUEST segments on this * CPU, too, so put them in the Host GDT. */ get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; } /* * In the Switcher, we want the %cs segment register to use the * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so * it will be undisturbed when we switch. To change %cs and jump we * need this structure to feed to Intel's "lcall" instruction. */ lguest_entry.offset = (long)switch_to_guest + switcher_offset(); lguest_entry.segment = LGUEST_CS; /* * Finally, we need to turn off "Page Global Enable". PGE is an * optimization where page table entries are specially marked to show * they never change. The Host kernel marks all the kernel pages this * way because it's always present, even when userspace is running. * * Lguest breaks this: unbeknownst to the rest of the Host kernel, we * switch to the Guest kernel. If you don't disable this on all CPUs, * you'll get really weird bugs that you'll chase for two days. * * I used to turn PGE off every time we switched to the Guest and back * on when we return, but that slowed the Switcher down noticibly. */ /* * We don't need the complexity of CPUs coming and going while we're * doing this. */ get_online_cpus(); if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; /* * adjust_pge is a helper function which sets or unsets the PGE * bit on its CPU, depending on the argument (0 == unset). */ on_each_cpu(adjust_pge, (void *)0, 1); /* Turn off the feature in the global feature set. */ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); } put_online_cpus(); }