Example #1
0
/*
 * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
 * by the selector 'sel'.
 *
 * Returns 0 on success.
 * Returns 1 if an exception was injected into the guest.
 * Returns -1 otherwise.
 */
static int
desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
    uint16_t sel, struct user_segment_descriptor *desc, bool doread)
{
	struct iovec iov[2];
	uint64_t base;
	uint32_t limit, access;
	int error, reg;

	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
	assert(error == 0);
	assert(limit >= SEL_LIMIT(sel));

	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
	if (error == 0) {
		if (doread)
			vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
		else
			vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
	}
	return (error);
}
Example #2
0
int
vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
	struct seg_desc nt;
	struct tss32 oldtss, newtss;
	struct vm_task_switch *task_switch;
	struct vm_guest_paging *paging, sup_paging;
	struct user_segment_descriptor nt_desc, ot_desc;
	struct iovec nt_iov[2], ot_iov[2];
	uint64_t cr0, ot_base;
	uint32_t eip, ot_lim, access;
	int error, ext, minlimit, nt_type, ot_type, vcpu;
	enum task_switch_reason reason;
	uint16_t nt_sel, ot_sel;

	task_switch = &vmexit->u.task_switch;
	nt_sel = task_switch->tsssel;
	ext = vmexit->u.task_switch.ext;
	reason = vmexit->u.task_switch.reason;
	paging = &vmexit->u.task_switch.paging;
	vcpu = *pvcpu;

	assert(paging->cpu_mode == CPU_MODE_PROTECTED);

	/*
	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
	 * The following page table accesses are implicitly supervisor mode:
	 * - accesses to GDT or LDT to load segment descriptors
	 * - accesses to the task state segment during task switch
	 */
	sup_paging = *paging;
	sup_paging.cpl = 0;	/* implicit supervisor mode */

	/* Fetch the new TSS descriptor */
	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc);
	CHKERR(error);

	nt = usd_to_seg_desc(&nt_desc);

	/* Verify the type of the new TSS */
	nt_type = SEG_DESC_TYPE(nt.access);
	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
		goto done;
	}

	/* TSS descriptor must have present bit set */
	if (!SEG_DESC_PRESENT(nt.access)) {
		sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
		goto done;
	}

	/*
	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
	 * 44 bytes for a 16-bit TSS.
	 */
	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
		minlimit = 104 - 1;
	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
		minlimit = 44 - 1;
	else
		minlimit = 0;

	assert(minlimit > 0);
	if (nt.limit < minlimit) {
		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
		goto done;
	}

	/* TSS must be busy if task switch is due to IRET */
	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
		goto done;
	}

	/*
	 * TSS must be available (not busy) if task switch reason is
	 * CALL, JMP, exception or interrupt.
	 */
	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
		sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
		goto done;
	}

	/* Fetch the new TSS */
	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
	CHKERR(error);
	vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);

	/* Get the old TSS selector from the guest's task register */
	ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
		/*
		 * This might happen if a task switch was attempted without
		 * ever loading the task register with LTR. In this case the
		 * TR would contain the values from power-on:
		 * (sel = 0, base = 0, limit = 0xffff).
		 */
		sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
		goto done;
	}

	/* Get the old TSS base and limit from the guest's task register */
	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
	    &access);
	assert(error == 0);
	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
	ot_type = SEG_DESC_TYPE(access);
	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);

	/* Fetch the old TSS descriptor */
	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc);
	CHKERR(error);

	/* Get the old TSS */
	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
	CHKERR(error);
	vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);

	/*
	 * Clear the busy bit in the old TSS descriptor if the task switch
	 * due to an IRET or JMP instruction.
	 */
	if (reason == TSR_IRET || reason == TSR_JMP) {
		ot_desc.sd_type &= ~0x2;
		error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
		    &ot_desc);
		CHKERR(error);
	}

	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
		fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
		return (VMEXIT_ABORT);
	}

	/* Save processor state in old TSS */
	eip = vmexit->rip + vmexit->inst_length;
	tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);

	/*
	 * If the task switch was triggered for any reason other than IRET
	 * then set the busy bit in the new TSS descriptor.
	 */
	if (reason != TSR_IRET) {
		nt_desc.sd_type |= 0x2;
		error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
		    &nt_desc);
		CHKERR(error);
	}

	/* Update task register to point at the new TSS */
	SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);

	/* Update the hidden descriptor state of the task register */
	nt = usd_to_seg_desc(&nt_desc);
	update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);

	/* Set CR0.TS */
	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
	SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);

	/*
	 * We are now committed to the task switch. Any exceptions encountered
	 * after this point will be handled in the context of the new task and
	 * the saved instruction pointer will belong to the new task.
	 */
	vmexit->rip = newtss.tss_eip;
	vmexit->inst_length = 0;

	/* Load processor state from new TSS */
	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
	CHKERR(error);

	/*
	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
	 * caused an error code to be generated, this error code is copied
	 * to the stack of the new task.
	 */
	if (task_switch->errcode_valid) {
		assert(task_switch->ext);
		assert(task_switch->reason == TSR_IDT_GATE);
		error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
		    task_switch->errcode);
		CHKERR(error);
	}

	/*
	 * Treatment of virtual-NMI blocking if NMI is delivered through
	 * a task gate.
	 *
	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
	 * If the virtual NMIs VM-execution control is 1, VM entry injects
	 * an NMI, and delivery of the NMI causes a task switch that causes
	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
	 * commences.
	 *
	 * Thus, virtual-NMI blocking is in effect at the time of the task
	 * switch VM exit.
	 */

	/*
	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
	 *
	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
	 *
	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
	 * VM exit.
	 */

	/*
	 * If the task switch was triggered by an event delivered through
	 * the IDT then extinguish the pending event from the vcpu's
	 * exitintinfo.
	 */
	if (task_switch->reason == TSR_IDT_GATE) {
		error = vm_set_intinfo(ctx, vcpu, 0);
		assert(error == 0);
	}

	/*
	 * XXX should inject debug exception if 'T' bit is 1
	 */
done:
	return (VMEXIT_CONTINUE);
}
Example #3
0
int
emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
{
	int addrsize, bytes, flags, in, port, prot, rep;
	uint32_t val;
	inout_func_t handler;
	void *arg;
	int error, retval;
	enum vm_reg_name idxreg;
	uint64_t gla, index, iterations, count;
	struct vm_inout_str *vis;
	struct iovec iov[2];

	bytes = vmexit->u.inout.bytes;
	in = vmexit->u.inout.in;
	port = vmexit->u.inout.port;

	assert(port < MAX_IOPORTS);
	assert(bytes == 1 || bytes == 2 || bytes == 4);

	handler = inout_handlers[port].handler;

	if (strict && handler == default_inout)
		return (-1);

	flags = inout_handlers[port].flags;
	arg = inout_handlers[port].arg;

	if (in) {
		if (!(flags & IOPORT_F_IN))
			return (-1);
	} else {
		if (!(flags & IOPORT_F_OUT))
			return (-1);
	}

	retval = 0;
	if (vmexit->u.inout.string) {
		vis = &vmexit->u.inout_str;
		rep = vis->inout.rep;
		addrsize = vis->addrsize;
		prot = in ? PROT_WRITE : PROT_READ;
		assert(addrsize == 2 || addrsize == 4 || addrsize == 8);

		/* Index register */
		idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
		index = vis->index & vie_size2mask(addrsize);

		/* Count register */
		count = vis->count & vie_size2mask(addrsize);

		/* Limit number of back-to-back in/out emulations to 16 */
		iterations = MIN(count, 16);
		while (iterations > 0) {
			if (vie_calculate_gla(vis->paging.cpu_mode,
			    vis->seg_name, &vis->seg_desc, index, bytes,
			    addrsize, prot, &gla)) {
				error = vm_inject_exception2(ctx, vcpu,
				    IDT_GP, 0);
				assert(error == 0);
				retval = INOUT_RESTART;
				break;
			}

			error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes,
			    prot, iov, nitems(iov));
			assert(error == 0 || error == 1 || error == -1);
			if (error) {
				retval = (error == 1) ? INOUT_RESTART :
				    INOUT_ERROR;
				break;
			}

			if (vie_alignment_check(vis->paging.cpl, bytes,
			    vis->cr0, vis->rflags, gla)) {
				error = vm_inject_exception2(ctx, vcpu,
				    IDT_AC, 0);
				assert(error == 0);
				return (INOUT_RESTART);
			}

			val = 0;
			if (!in)
				vm_copyin(ctx, vcpu, iov, &val, bytes);

			retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
			if (retval != 0)
				break;

			if (in)
				vm_copyout(ctx, vcpu, &val, iov, bytes);

			/* Update index */
			if (vis->rflags & PSL_D)
				index -= bytes;
			else
				index += bytes;

			count--;
			iterations--;
		}

		/* Update index register */
		error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
		assert(error == 0);

		/*
		 * Update count register only if the instruction had a repeat
		 * prefix.
		 */
		if (rep) {
			error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
			    count, addrsize);
			assert(error == 0);
		}

		/* Restart the instruction if more iterations remain */
		if (retval == INOUT_OK && count != 0)
			retval = INOUT_RESTART;
	} else {
		if (!in) {
			val = vmexit->u.inout.eax & vie_size2mask(bytes);
		}
		retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
		if (retval == 0 && in) {
			vmexit->u.inout.eax &= ~vie_size2mask(bytes);
			vmexit->u.inout.eax |= val & vie_size2mask(bytes);
		}
	}
	return (retval);
}