Beispiel #1
0
/*H:130 Now we've examined the hypercall code; our Guest can make requests.
 * Our Guest is usually so well behaved; it never tries to do things it isn't
 * allowed to, and uses hypercalls instead.  Unfortunately, Linux's paravirtual
 * infrastructure isn't quite complete, because it doesn't contain replacements
 * for the Intel I/O instructions.  As a result, the Guest sometimes fumbles
 * across one during the boot process as it probes for various things which are
 * usually attached to a PC.
 *
 * When the Guest uses one of these instructions, we get a trap (General
 * Protection Fault) and come here.  We see if it's one of those troublesome
 * instructions and skip over it.  We return true if we did. */
static int emulate_insn(struct lg_cpu *cpu)
{
	u8 insn;
	unsigned int insnlen = 0, in = 0, shift = 0;
	/* The eip contains the *virtual* address of the Guest's instruction:
	 * guest_pa just subtracts the Guest's page_offset. */
	unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);

	/* This must be the Guest kernel trying to do something, not userspace!
	 * The bottom two bits of the CS segment register are the privilege
	 * level. */
	if ((cpu->regs->cs & 3) != GUEST_PL)
		return 0;

	/* Decoding x86 instructions is icky. */
	insn = lgread(cpu, physaddr, u8);

	/* 0x66 is an "operand prefix".  It means it's using the upper 16 bits
	   of the eax register. */
	if (insn == 0x66) {
		shift = 16;
		/* The instruction is 1 byte so far, read the next byte. */
		insnlen = 1;
		insn = lgread(cpu, physaddr + insnlen, u8);
	}

	/* We can ignore the lower bit for the moment and decode the 4 opcodes
	 * we need to emulate. */
	switch (insn & 0xFE) {
	case 0xE4: /* in     <next byte>,%al */
		insnlen += 2;
		in = 1;
		break;
	case 0xEC: /* in     (%dx),%al */
		insnlen += 1;
		in = 1;
		break;
	case 0xE6: /* out    %al,<next byte> */
		insnlen += 2;
		break;
	case 0xEE: /* out    %al,(%dx) */
		insnlen += 1;
		break;
	default:
		/* OK, we don't know what this is, can't emulate. */
		return 0;
	}

	/* If it was an "IN" instruction, they expect the result to be read
	 * into %eax, so we change %eax.  We always return all-ones, which
	 * traditionally means "there's nothing there". */
	if (in) {
		/* Lower bit tells is whether it's a 16 or 32 bit access */
		if (insn & 0x1)
			cpu->regs->eax = 0xFFFFFFFF;
		else
			cpu->regs->eax |= (0xFFFF << shift);
	}
	/* Finally, we've "done" the instruction, so move past it. */
	cpu->regs->eip += insnlen;
	/* Success! */
	return 1;
}
/*H:210
 * The set_guest_interrupt() routine actually delivers the interrupt or
 * trap.  The mechanics of delivering traps and interrupts to the Guest are the
 * same, except some traps have an "error code" which gets pushed onto the
 * stack as well: the caller tells us if this is one.
 *
 * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
 * interrupt or trap.  It's split into two parts for traditional reasons: gcc
 * on i386 used to be frightened by 64 bit numbers.
 *
 * We set up the stack just like the CPU does for a real interrupt, so it's
 * identical for the Guest (and the standard "iret" instruction will undo
 * it).
 */
static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
				bool has_err)
{
	unsigned long gstack, origstack;
	u32 eflags, ss, irq_enable;
	unsigned long virtstack;

	/*
	 * There are two cases for interrupts: one where the Guest is already
	 * in the kernel, and a more complex one where the Guest is in
	 * userspace.  We check the privilege level to find out.
	 */
	if ((cpu->regs->ss&0x3) != GUEST_PL) {
		/*
		 * The Guest told us their kernel stack with the SET_STACK
		 * hypercall: both the virtual address and the segment.
		 */
		virtstack = cpu->esp1;
		ss = cpu->ss1;

		origstack = gstack = guest_pa(cpu, virtstack);
		/*
		 * We push the old stack segment and pointer onto the new
		 * stack: when the Guest does an "iret" back from the interrupt
		 * handler the CPU will notice they're dropping privilege
		 * levels and expect these here.
		 */
		push_guest_stack(cpu, &gstack, cpu->regs->ss);
		push_guest_stack(cpu, &gstack, cpu->regs->esp);
	} else {
		/* We're staying on the same Guest (kernel) stack. */
		virtstack = cpu->regs->esp;
		ss = cpu->regs->ss;

		origstack = gstack = guest_pa(cpu, virtstack);
	}

	/*
	 * Remember that we never let the Guest actually disable interrupts, so
	 * the "Interrupt Flag" bit is always set.  We copy that bit from the
	 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
	 * copy it back in "lguest_iret".
	 */
	eflags = cpu->regs->eflags;
	if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
	    && !(irq_enable & X86_EFLAGS_IF))
		eflags &= ~X86_EFLAGS_IF;

	/*
	 * An interrupt is expected to push three things on the stack: the old
	 * "eflags" word, the old code segment, and the old instruction
	 * pointer.
	 */
	push_guest_stack(cpu, &gstack, eflags);
	push_guest_stack(cpu, &gstack, cpu->regs->cs);
	push_guest_stack(cpu, &gstack, cpu->regs->eip);

	/* For the six traps which supply an error code, we push that, too. */
	if (has_err)
		push_guest_stack(cpu, &gstack, cpu->regs->errcode);

	/*
	 * Now we've pushed all the old state, we change the stack, the code
	 * segment and the address to execute.
	 */
	cpu->regs->ss = ss;
	cpu->regs->esp = virtstack + (gstack - origstack);
	cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
	cpu->regs->eip = idt_address(lo, hi);

	/*
	 * There are two kinds of interrupt handlers: 0xE is an "interrupt
	 * gate" which expects interrupts to be disabled on entry.
	 */
	if (idt_type(lo, hi) == 0xE)
		if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
			kill_guest(cpu, "Disabling interrupts");
}
Beispiel #3
0
/*H:130
 * Now we've examined the hypercall code; our Guest can make requests.
 * Our Guest is usually so well behaved; it never tries to do things it isn't
 * allowed to, and uses hypercalls instead.  Unfortunately, Linux's paravirtual
 * infrastructure isn't quite complete, because it doesn't contain replacements
 * for the Intel I/O instructions.  As a result, the Guest sometimes fumbles
 * across one during the boot process as it probes for various things which are
 * usually attached to a PC.
 *
 * When the Guest uses one of these instructions, we get a trap (General
 * Protection Fault) and come here.  We see if it's one of those troublesome
 * instructions and skip over it.  We return true if we did.
 */
static int emulate_insn(struct lg_cpu *cpu)
{
	u8 insn;
	unsigned int insnlen = 0, in = 0, small_operand = 0;
	/*
	 * The eip contains the *virtual* address of the Guest's instruction:
	 * walk the Guest's page tables to find the "physical" address.
	 */
	unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);

	/*
	 * This must be the Guest kernel trying to do something, not userspace!
	 * The bottom two bits of the CS segment register are the privilege
	 * level.
	 */
	if ((cpu->regs->cs & 3) != GUEST_PL)
		return 0;

	/* Decoding x86 instructions is icky. */
	insn = lgread(cpu, physaddr, u8);

	/*
	 * Around 2.6.33, the kernel started using an emulation for the
	 * cmpxchg8b instruction in early boot on many configurations.  This
	 * code isn't paravirtualized, and it tries to disable interrupts.
	 * Ignore it, which will Mostly Work.
	 */
	if (insn == 0xfa) {
		/* "cli", or Clear Interrupt Enable instruction.  Skip it. */
		cpu->regs->eip++;
		return 1;
	}

	/*
	 * 0x66 is an "operand prefix".  It means a 16, not 32 bit in/out.
	 */
	if (insn == 0x66) {
		small_operand = 1;
		/* The instruction is 1 byte so far, read the next byte. */
		insnlen = 1;
		insn = lgread(cpu, physaddr + insnlen, u8);
	}

	/*
	 * We can ignore the lower bit for the moment and decode the 4 opcodes
	 * we need to emulate.
	 */
	switch (insn & 0xFE) {
	case 0xE4: /* in     <next byte>,%al */
		insnlen += 2;
		in = 1;
		break;
	case 0xEC: /* in     (%dx),%al */
		insnlen += 1;
		in = 1;
		break;
	case 0xE6: /* out    %al,<next byte> */
		insnlen += 2;
		break;
	case 0xEE: /* out    %al,(%dx) */
		insnlen += 1;
		break;
	default:
		/* OK, we don't know what this is, can't emulate. */
		return 0;
	}

	/*
	 * If it was an "IN" instruction, they expect the result to be read
	 * into %eax, so we change %eax.  We always return all-ones, which
	 * traditionally means "there's nothing there".
	 */
	if (in) {
		/* Lower bit tells means it's a 32/16 bit access */
		if (insn & 0x1) {
			if (small_operand)
				cpu->regs->eax |= 0xFFFF;
			else
				cpu->regs->eax = 0xFFFFFFFF;
		} else
			cpu->regs->eax |= 0xFF;
	}
	/* Finally, we've "done" the instruction, so move past it. */
	cpu->regs->eip += insnlen;
	/* Success! */
	return 1;
}