static void tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch, uint32_t eip, struct tss32 *tss, struct iovec *iov) { /* General purpose registers */ tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX); tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX); tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX); tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX); tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP); tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI); tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI); /* Segment selectors */ tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES); tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS); tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS); tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS); tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS); /* eflags and eip */ tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); if (task_switch->reason == TSR_IRET) tss->tss_eflags &= ~PSL_NT; tss->tss_eip = eip; /* Copy updated old TSS into guest memory */ vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32)); }
/* * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced * by the selector 'sel'. * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. */ static int desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint16_t sel, struct user_segment_descriptor *desc, bool doread) { struct iovec iov[2]; uint64_t base; uint32_t limit, access; int error, reg; reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); assert(error == 0); assert(limit >= SEL_LIMIT(sel)); error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov)); if (error == 0) { if (doread) vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); else vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); } return (error); }
/* * Push an error code on the stack of the new task. This is needed if the * task switch was triggered by a hardware exception that causes an error * code to be saved (e.g. #PF). * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. */ static int push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, int task_type, uint32_t errcode) { struct iovec iov[2]; struct seg_desc seg_desc; int stacksize, bytes, error; uint64_t gla, cr0, rflags; uint32_t esp; uint16_t stacksel; cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base, &seg_desc.limit, &seg_desc.access); assert(error == 0); /* * Section "Error Code" in the Intel SDM vol 3: the error code is * pushed on the stack as a doubleword or word (depending on the * default interrupt, trap or task gate size). */ if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS) bytes = 4; else bytes = 2; /* * PUSH instruction from Intel SDM vol 2: the 'B' flag in the * stack-segment descriptor determines the size of the stack * pointer outside of 64-bit mode. */ if (SEG_DESC_DEF32(seg_desc.access)) stacksize = 4; else stacksize = 2; esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); esp -= bytes; if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); return (1); } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { vm_inject_ac(ctx, vcpu, 1); return (1); } error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, iov, nitems(iov)); if (error) return (error); vm_copyout(ctx, vcpu, &errcode, iov, bytes); SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp); return (0); }
/* * Update the vcpu registers to reflect the state of the new task. * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. */ static int tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, uint16_t ot_sel, struct tss32 *tss, struct iovec *iov) { struct seg_desc seg_desc, seg_desc2; uint64_t *pdpte, maxphyaddr, reserved; uint32_t eflags; int error, i; bool nested; nested = false; if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) { tss->tss_link = ot_sel; nested = true; } eflags = tss->tss_eflags; if (nested) eflags |= PSL_NT; /* LDTR */ SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt); /* PBDR */ if (ts->paging.paging_mode != PAGING_MODE_FLAT) { if (ts->paging.paging_mode == PAGING_MODE_PAE) { /* * XXX Assuming 36-bit MAXPHYADDR. */ maxphyaddr = (1UL << 36) - 1; pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32); for (i = 0; i < 4; i++) { /* Check reserved bits if the PDPTE is valid */ if (!(pdpte[i] & 0x1)) continue; /* * Bits 2:1, 8:5 and bits above the processor's * maximum physical address are reserved. */ reserved = ~maxphyaddr | 0x1E6; if (pdpte[i] & reserved) { vm_inject_gp(ctx, vcpu); return (1); } } SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]); } SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3); ts->paging.cr3 = tss->tss_cr3; } /* eflags and eip */ SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags); SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip); /* General purpose registers */ SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax); SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx); SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx); SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx); SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp); SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp); SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi); SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi); /* Segment selectors */ SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es); SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs); SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss); SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds); SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs); SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs); /* * If this is a nested task then write out the new TSS to update * the previous link field. */ if (nested) vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); /* Validate segment descriptors */ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); /* * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3. * * The SS and CS attribute checks on VM-entry are inter-dependent so * we need to make sure that both segments are valid before updating * either of them. This ensures that the VMCS state can pass the * VM-entry checks so the guest can handle any exception injected * during task switch emulation. */ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc); if (error) return (error); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc); if (error) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); return (0); }
int emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) { int addrsize, bytes, flags, in, port, prot, rep; uint32_t val; inout_func_t handler; void *arg; int error, retval; enum vm_reg_name idxreg; uint64_t gla, index, iterations, count; struct vm_inout_str *vis; struct iovec iov[2]; bytes = vmexit->u.inout.bytes; in = vmexit->u.inout.in; port = vmexit->u.inout.port; assert(port < MAX_IOPORTS); assert(bytes == 1 || bytes == 2 || bytes == 4); handler = inout_handlers[port].handler; if (strict && handler == default_inout) return (-1); flags = inout_handlers[port].flags; arg = inout_handlers[port].arg; if (in) { if (!(flags & IOPORT_F_IN)) return (-1); } else { if (!(flags & IOPORT_F_OUT)) return (-1); } retval = 0; if (vmexit->u.inout.string) { vis = &vmexit->u.inout_str; rep = vis->inout.rep; addrsize = vis->addrsize; prot = in ? PROT_WRITE : PROT_READ; assert(addrsize == 2 || addrsize == 4 || addrsize == 8); /* Index register */ idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; index = vis->index & vie_size2mask(addrsize); /* Count register */ count = vis->count & vie_size2mask(addrsize); /* Limit number of back-to-back in/out emulations to 16 */ iterations = MIN(count, 16); while (iterations > 0) { if (vie_calculate_gla(vis->paging.cpu_mode, vis->seg_name, &vis->seg_desc, index, bytes, addrsize, prot, &gla)) { error = vm_inject_exception2(ctx, vcpu, IDT_GP, 0); assert(error == 0); retval = INOUT_RESTART; break; } error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes, prot, iov, nitems(iov)); assert(error == 0 || error == 1 || error == -1); if (error) { retval = (error == 1) ? INOUT_RESTART : INOUT_ERROR; break; } if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, gla)) { error = vm_inject_exception2(ctx, vcpu, IDT_AC, 0); assert(error == 0); return (INOUT_RESTART); } val = 0; if (!in) vm_copyin(ctx, vcpu, iov, &val, bytes); retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval != 0) break; if (in) vm_copyout(ctx, vcpu, &val, iov, bytes); /* Update index */ if (vis->rflags & PSL_D) index -= bytes; else index += bytes; count--; iterations--; } /* Update index register */ error = vie_update_register(ctx, vcpu, idxreg, index, addrsize); assert(error == 0); /* * Update count register only if the instruction had a repeat * prefix. */ if (rep) { error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX, count, addrsize); assert(error == 0); } /* Restart the instruction if more iterations remain */ if (retval == INOUT_OK && count != 0) retval = INOUT_RESTART; } else { if (!in) { val = vmexit->u.inout.eax & vie_size2mask(bytes); } retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval == 0 && in) { vmexit->u.inout.eax &= ~vie_size2mask(bytes); vmexit->u.inout.eax |= val & vie_size2mask(bytes); } } return (retval); }