/* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ void vcpu_notify_event(struct vm *vm, int vcpuid, UNUSED bool lapic_intr) { struct vcpu *vcpu; vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); if (vcpu->state == VCPU_RUNNING) { VCPU_INTERRUPT(vcpuid); /* FIXME */ // if (hostcpu != curcpu) { // if (lapic_intr) { // vlapic_post_intr(vcpu->vlapic, hostcpu, // vmm_ipinum); // } else { // ipi_cpu(hostcpu, vmm_ipinum); // } // } else { // /* // * If the 'vcpu' is running on 'curcpu' then it must // * be sending a notification to itself (e.g. SELF_IPI). // * The pending event will be picked up when the vcpu // * transitions back to guest context. // */ // } } else { if (vcpu->state == VCPU_SLEEPING) pthread_cond_signal(&vcpu->vcpu_sleep_cnd); //wakeup_one(vcpu); } vcpu_unlock(vcpu); }
/* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) { int hostcpu; struct vcpu *vcpu; vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { if (lapic_intr) { vlapic_post_intr(vcpu->vlapic, hostcpu, vmm_ipinum); } else { ipi_cpu(hostcpu, vmm_ipinum); } } else { /* * If the 'vcpu' is running on 'curcpu' then it must * be sending a notification to itself (e.g. SELF_IPI). * The pending event will be picked up when the vcpu * transitions back to guest context. */ } } else { KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " "with hostcpu %d", vcpu->state, hostcpu)); if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } vcpu_unlock(vcpu); }
void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) { struct vcpu *vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); vcpu_notify_event_locked(vcpu, vcpuid, lapic_intr); vcpu_unlock(vcpu); }
/* * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ static int vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) { struct vm_exit *vmexit; struct vcpu *vcpu; int t, timo, spindown; vcpu = &vm->vcpu[vcpuid]; spindown = 0; vcpu_lock(vcpu); /* * Do a final check for pending NMI or interrupts before * really putting this thread to sleep. * * These interrupts could have happened any time after we * returned from VMRUN() and before we grabbed the vcpu lock. */ if (!vm_nmi_pending(vm, vcpuid) && (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) { t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); if (vlapic_enabled(vcpu->vlapic)) { /* * XXX msleep_spin() is not interruptible so use the * 'timo' to put an upper bound on the sleep time. */ timo = hz; msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo); } else { /* * Spindown the vcpu if the apic is disabled and it * had entered the halted state. */ spindown = 1; } vcpu_require_state_locked(vcpu, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } vcpu_unlock(vcpu); /* * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it * outside the confines of the vcpu spinlock. */ if (spindown) { *retu = true; vmexit = vm_exitinfo(vm, vcpuid); vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU; vm_deactivate_cpu(vm, vcpuid); VCPU_CTR0(vm, vcpuid, "spinning down cpu"); } return (0); }
enum vcpu_state vcpu_get_state(struct vm *vm, int vcpuid) { struct vcpu *vcpu; enum vcpu_state state; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) xhyve_abort("vm_get_run_state: invalid vcpuid %d\n", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); state = vcpu->state; vcpu_unlock(vcpu); return (state); }
int vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, bool from_idle) { int error; struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) xhyve_abort("vm_set_run_state: invalid vcpuid %d\n", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); }
enum vcpu_state vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) { struct vcpu *vcpu; enum vcpu_state state; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_get_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) *hostcpu = vcpu->hostcpu; vcpu_unlock(vcpu); return (state); }
void vm_inject_gp(struct vm *vm, int vcpuid) { struct vm_exception gpf = { .vector = IDT_GP, .error_code_valid = 1, .error_code = 0 }; vm_inject_fault(vm, vcpuid, &gpf); } void vm_inject_ud(struct vm *vm, int vcpuid) { struct vm_exception udf = { .vector = IDT_UD, .error_code_valid = 0 }; vm_inject_fault(vm, vcpuid, &udf); } static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int vm_inject_nmi(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; vcpu->nmi_pending = 1; vcpu_notify_event(vm, vcpuid, false); return (0); } int vm_nmi_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; return (vcpu->nmi_pending); } void vm_nmi_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; if (vcpu->nmi_pending == 0) panic("vm_nmi_clear: inconsistent nmi_pending state"); vcpu->nmi_pending = 0; vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); } static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); int vm_inject_extint(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; vcpu->extint_pending = 1; vcpu_notify_event(vm, vcpuid, false); return (0); } int vm_extint_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; return (vcpu->extint_pending); } void vm_extint_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; if (vcpu->extint_pending == 0) panic("vm_extint_clear: inconsistent extint_pending state"); vcpu->extint_pending = 0; vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); } int vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (VMGETCAP(vm->cookie, vcpu, type, retval)); } int vm_set_capability(struct vm *vm, int vcpu, int type, int val) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (VMSETCAP(vm->cookie, vcpu, type, val)); } uint64_t * vm_guest_msrs(struct vm *vm, int cpu) { return (vm->vcpu[cpu].guest_msrs); } struct vlapic * vm_lapic(struct vm *vm, int cpu) { return (vm->vcpu[cpu].vlapic); } struct vioapic * vm_ioapic(struct vm *vm) { return (vm->vioapic); } struct vhpet * vm_hpet(struct vm *vm) { return (vm->vhpet); } boolean_t vmm_is_pptdev(int bus, int slot, int func) { int found, i, n; int b, s, f; char *val, *cp, *cp2; /* * XXX * The length of an environment variable is limited to 128 bytes which * puts an upper limit on the number of passthru devices that may be * specified using a single environment variable. * * Work around this by scanning multiple environment variable * names instead of a single one - yuck! */ const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ found = 0; for (i = 0; names[i] != NULL && !found; i++) { cp = val = getenv(names[i]); while (cp != NULL && *cp != '\0') { if ((cp2 = strchr(cp, ' ')) != NULL) *cp2 = '\0'; n = sscanf(cp, "%d/%d/%d", &b, &s, &f); if (n == 3 && bus == b && slot == s && func == f) { found = 1; break; } if (cp2 != NULL) *cp2++ = ' '; cp = cp2; } freeenv(val); } return (found); } void * vm_iommu_domain(struct vm *vm) { return (vm->iommu); } int vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, bool from_idle) { int error; struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_set_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); }
/* * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ static int vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled) { struct vcpu *vcpu; const char *wmesg; int vcpu_halted, vm_halted; const struct timespec ts = {.tv_sec = 1, .tv_nsec = 0}; /* 1 second */ KASSERT(!CPU_ISSET(((unsigned) vcpuid), &vm->halted_cpus), ("vcpu already halted")); vcpu = &vm->vcpu[vcpuid]; vcpu_halted = 0; vm_halted = 0; vcpu_lock(vcpu); while (1) { /* * Do a final check for pending NMI or interrupts before * really putting this thread to sleep. Also check for * software events that would cause this vcpu to wakeup. * * These interrupts/events could have happened after the * vcpu returned from VMRUN() and before it acquired the * vcpu lock above. */ if (vm->rendezvous_func != NULL || vm->suspend) break; if (vm_nmi_pending(vm, vcpuid)) break; if (!intr_disabled) { if (vm_extint_pending(vm, vcpuid) || vlapic_pending_intr(vcpu->vlapic, NULL)) { break; } } /* * Some Linux guests implement "halt" by having all vcpus * execute HLT with interrupts disabled. 'halted_cpus' keeps * track of the vcpus that have entered this state. When all * vcpus enter the halted state the virtual machine is halted. */ if (intr_disabled) { wmesg = "vmhalt"; VCPU_CTR0(vm, vcpuid, "Halted"); if (!vcpu_halted && halt_detection_enabled) { vcpu_halted = 1; CPU_SET_ATOMIC(((unsigned) vcpuid), &vm->halted_cpus); } if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { vm_halted = 1; break; } } else { wmesg = "vmidle"; } //t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ pthread_mutex_lock(&vcpu->vcpu_sleep_mtx); vcpu_unlock(vcpu); pthread_cond_timedwait_relative_np(&vcpu->vcpu_sleep_cnd, &vcpu->vcpu_sleep_mtx, &ts); vcpu_lock(vcpu); pthread_mutex_unlock(&vcpu->vcpu_sleep_mtx); //msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); //vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } if (vcpu_halted) CPU_CLR_ATOMIC(((unsigned) vcpuid), &vm->halted_cpus); vcpu_unlock(vcpu); if (vm_halted) vm_suspend(vm, VM_SUSPEND_HALT); return (0); } static int vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) { struct vie *vie; struct vcpu *vcpu; struct vm_exit *vme; uint64_t gla, gpa, cs_base; struct vm_guest_paging *paging; mem_region_read_t mread; mem_region_write_t mwrite; enum vm_cpu_mode cpu_mode; int cs_d, error, fault, length; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; gla = vme->u.inst_emul.gla; gpa = vme->u.inst_emul.gpa; cs_base = vme->u.inst_emul.cs_base; cs_d = vme->u.inst_emul.cs_d; vie = &vme->u.inst_emul.vie; paging = &vme->u.inst_emul.paging; cpu_mode = paging->cpu_mode; VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#llx", gpa); /* Fetch, decode and emulate the faulting instruction */ if (vie->num_valid == 0) { /* * If the instruction length is not known then assume a * maximum size instruction. */ length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE; error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + cs_base, length, vie, &fault); } else { /* * The instruction bytes have already been copied into 'vie' */ error = fault = 0; } if (error || fault) return (error); if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#llx", vme->rip + cs_base); *retu = true; /* dump instruction bytes in userspace */ return (0); } /* * If the instruction length was not specified then update it now * along with 'nextrip'. */ if (vme->inst_length == 0) { vme->inst_length = vie->num_processed; vcpu->nextrip += vie->num_processed; } /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + XHYVE_PAGE_SIZE) { mread = lapic_mmio_read; mwrite = lapic_mmio_write; } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { mread = vioapic_mmio_read; mwrite = vioapic_mmio_write; } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { mread = vhpet_mmio_read; mwrite = vhpet_mmio_write; } else { *retu = true; return (0); } error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, mread, mwrite, retu); return (error); } static int vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) { int i, done; struct vcpu *vcpu; const struct timespec ts = {.tv_sec = 1, .tv_nsec = 0}; /* 1 second */ done = 0; vcpu = &vm->vcpu[vcpuid]; CPU_SET_ATOMIC(((unsigned) vcpuid), &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. * * Since a VM may be suspended at any time including when one or * more vcpus are doing a rendezvous we need to call the rendezvous * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); while (1) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); break; } if (vm->rendezvous_func == NULL) { VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); vcpu_require_state_locked(vcpu, VCPU_SLEEPING); pthread_mutex_lock(&vcpu->vcpu_sleep_mtx); vcpu_unlock(vcpu); pthread_cond_timedwait_relative_np(&vcpu->vcpu_sleep_cnd, &vcpu->vcpu_sleep_mtx, &ts); vcpu_lock(vcpu); pthread_mutex_unlock(&vcpu->vcpu_sleep_mtx); //msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); } else { VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); vcpu_unlock(vcpu); vm_handle_rendezvous(vm, vcpuid); vcpu_lock(vcpu); } } vcpu_unlock(vcpu); /* * Wakeup the other sleeping vcpus and return to userspace. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(((unsigned) i), &vm->suspended_cpus)) { vcpu_notify_event(vm, i, false); } } *retu = true; return (0); } int vm_suspend(struct vm *vm, enum vm_suspend_how how) { int i; if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); if (atomic_cmpset_int(((volatile u_int *) &vm->suspend), 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); } VM_CTR1(vm, "virtual machine successfully suspended %d", how); /* * Notify all active vcpus that they are now suspended. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(((unsigned) i), &vm->active_cpus)) vcpu_notify_event(vm, i, false); } return (0); }
static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; const struct timespec ts = {.tv_sec = 1, .tv_nsec = 0}; /* 1 second */ /* * State transitions from the vmmdev_ioctl() must always begin from * the VCPU_IDLE state. This guarantees that there is only a single * ioctl() operating on a vcpu at any point. */ if (from_idle) { while (vcpu->state != VCPU_IDLE) { pthread_mutex_lock(&vcpu->state_sleep_mtx); vcpu_unlock(vcpu); pthread_cond_timedwait_relative_np(&vcpu->state_sleep_cnd, &vcpu->state_sleep_mtx, &ts); vcpu_lock(vcpu); pthread_mutex_unlock(&vcpu->state_sleep_mtx); //msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); } /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ switch (vcpu->state) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: error = (newstate != VCPU_FROZEN); break; case VCPU_FROZEN: error = (newstate == VCPU_FROZEN); break; } if (error) return (EBUSY); vcpu->state = newstate; if (newstate == VCPU_IDLE) pthread_cond_broadcast(&vcpu->state_sleep_cnd); //wakeup(&vcpu->state); return (0); } static void vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) xhyve_abort("Error %d setting state to %d\n", error, newstate); }