/* Flags that are needed in a pagetable entry, with the sense of NX inverted */ static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) { static uint32_t flags[] = { /* I/F - Usr Wr */ /* 0 0 0 0 */ _PAGE_PRESENT, /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 0 1 0 0 */ _PAGE_PRESENT, /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, }; /* Don't demand not-NX if the CPU wouldn't enforce it. */ if ( !guest_supports_nx(v) ) pfec &= ~PFEC_insn_fetch; /* Don't demand R/W if the CPU wouldn't enforce it. */ if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v)) && !(pfec & PFEC_user_mode) ) pfec &= ~PFEC_write_access; return flags[(pfec & 0x1f) >> 1]; }
int vmce_restore_vcpu(struct vcpu *v, const struct hvm_vmce_vcpu *ctxt) { unsigned long guest_mcg_cap; if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) guest_mcg_cap = INTEL_GUEST_MCG_CAP; else guest_mcg_cap = AMD_GUEST_MCG_CAP; if ( ctxt->caps & ~guest_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P ) { dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities" " %#" PRIx64 " for d%d:v%u (supported: %#Lx)\n", is_hvm_vcpu(v) ? "HVM" : "PV", ctxt->caps, v->domain->domain_id, v->vcpu_id, guest_mcg_cap & ~MCG_CAP_COUNT); return -EPERM; } v->arch.vmce.mcg_cap = ctxt->caps; v->arch.vmce.bank[0].mci_ctl2 = ctxt->mci_ctl2_bank0; v->arch.vmce.bank[1].mci_ctl2 = ctxt->mci_ctl2_bank1; return 0; }
/* Flags that are needed in a pagetable entry, with the sense of NX inverted */ static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) { /* Don't demand not-NX if the CPU wouldn't enforce it. */ if ( !guest_supports_nx(v) ) pfec &= ~PFEC_insn_fetch; /* Don't demand R/W if the CPU wouldn't enforce it. */ if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v)) && !(pfec & PFEC_user_mode) ) pfec &= ~PFEC_write_access; return gw_page_flags[(pfec & 0x1f) >> 1] | _PAGE_INVALID_BITS; }
void vpmu_lvtpc_update(uint32_t val) { struct vpmu_struct *vpmu; struct vcpu *curr = current; if ( likely(vpmu_mode == XENPMU_MODE_OFF) ) return; vpmu = vcpu_vpmu(curr); vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data || !vpmu_is_set(vpmu, VPMU_CACHED) ) apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); }
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, uint64_t supported, bool_t is_write) { struct vcpu *curr = current; struct vpmu_struct *vpmu; const struct arch_vpmu_ops *ops; int ret = 0; if ( likely(vpmu_mode == XENPMU_MODE_OFF) || ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(current->domain)) ) goto nop; vpmu = vcpu_vpmu(curr); ops = vpmu->arch_vpmu_ops; if ( !ops ) goto nop; if ( is_write && ops->do_wrmsr ) ret = ops->do_wrmsr(msr, *msr_content, supported); else if ( !is_write && ops->do_rdmsr ) ret = ops->do_rdmsr(msr, msr_content); else goto nop; /* * We may have received a PMU interrupt while handling MSR access * and since do_wr/rdmsr may load VPMU context we should save * (and unload) it again. */ if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data && vpmu_is_set(vpmu, VPMU_CACHED) ) { vpmu_set(vpmu, VPMU_CONTEXT_SAVE); ops->arch_vpmu_save(curr, 0); vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); } return ret; nop: if ( !is_write ) *msr_content = 0; return 0; }
void restore_rest_processor_state(void) { struct vcpu *v = current; load_TR(); #if defined(CONFIG_X86_64) /* Recover syscall MSRs */ wrmsrl(MSR_LSTAR, saved_lstar); wrmsrl(MSR_CSTAR, saved_cstar); wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U); if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) { /* Recover sysenter MSRs */ wrmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); wrmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); } #else /* !defined(CONFIG_X86_64) */ if ( supervisor_mode_kernel && cpu_has_sep ) wrmsr(MSR_IA32_SYSENTER_ESP, &init_tss[smp_processor_id()].esp1, 0); #endif /* Maybe load the debug registers. */ BUG_ON(is_hvm_vcpu(v)); if ( !is_idle_vcpu(v) && unlikely(v->arch.guest_context.debugreg[7]) ) { write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); write_debugreg(2, v->arch.guest_context.debugreg[2]); write_debugreg(3, v->arch.guest_context.debugreg[3]); write_debugreg(6, v->arch.guest_context.debugreg[6]); write_debugreg(7, v->arch.guest_context.debugreg[7]); } /* Reload FPU state on next FPU use. */ stts(); if (cpu_has_pat) wrmsrl(MSR_IA32_CR_PAT, host_pat); mtrr_ap_init(); mcheck_init(&boot_cpu_data); }
void vmx_vmcs_exit(struct vcpu *v) { struct foreign_vmcs *fv; if ( likely(v == current) ) return; fv = &this_cpu(foreign_vmcs); BUG_ON(fv->v != v); BUG_ON(fv->count == 0); if ( --fv->count == 0 ) { /* Don't confuse vmx_do_resume (for @v or @current!) */ vmx_clear_vmcs(v); if ( is_hvm_vcpu(current) ) vmx_load_vmcs(current); spin_unlock(&v->arch.hvm_vmx.vmcs_lock); vcpu_unpause(v); fv->v = NULL; } }
int vpmu_load(struct vcpu *v, bool_t from_guest) { struct vpmu_struct *vpmu = vcpu_vpmu(v); int pcpu = smp_processor_id(); struct vcpu *prev = NULL; if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) return 0; /* First time this VCPU is running here */ if ( vpmu->last_pcpu != pcpu ) { /* * Get the context from last pcpu that we ran on. Note that if another * VCPU is running there it must have saved this VPCU's context before * startig to run (see below). * There should be no race since remote pcpu will disable interrupts * before saving the context. */ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) { on_selected_cpus(cpumask_of(vpmu->last_pcpu), vpmu_save_force, (void *)v, 1); vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); } } /* Prevent forced context save from remote CPU */ local_irq_disable(); prev = per_cpu(last_vcpu, pcpu); if ( prev != v && prev ) { vpmu = vcpu_vpmu(prev); /* Someone ran here before us */ vpmu_save_force(prev); vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); vpmu = vcpu_vpmu(v); } local_irq_enable(); /* Only when PMU is counting, we load PMU context immediately. */ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) ) return 0; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) { int ret; apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); /* Arch code needs to set VPMU_CONTEXT_LOADED */ ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest); if ( ret ) { apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); return ret; } } return 0; }
void vpmu_do_interrupt(struct cpu_user_regs *regs) { struct vcpu *sampled = current, *sampling; struct vpmu_struct *vpmu; struct vlapic *vlapic; u32 vlapic_lvtpc; /* * dom0 will handle interrupt for special domains (e.g. idle domain) or, * in XENPMU_MODE_ALL, for everyone. */ if ( (vpmu_mode & XENPMU_MODE_ALL) || (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) { sampling = choose_hwdom_vcpu(); if ( !sampling ) return; } else sampling = sampled; vpmu = vcpu_vpmu(sampling); if ( !vpmu->arch_vpmu_ops ) return; /* PV(H) guest */ if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) ) { const struct cpu_user_regs *cur_regs; uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; domid_t domid; if ( !vpmu->xenpmu_data ) return; if ( is_pvh_vcpu(sampling) && !(vpmu_mode & XENPMU_MODE_ALL) && !vpmu->arch_vpmu_ops->do_interrupt(regs) ) return; if ( vpmu_is_set(vpmu, VPMU_CACHED) ) return; /* PV guest will be reading PMU MSRs from xenpmu_data */ vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1); vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); if ( has_hvm_container_vcpu(sampled) ) *flags = 0; else *flags = PMU_SAMPLE_PV; if ( sampled == sampling ) domid = DOMID_SELF; else domid = sampled->domain->domain_id; /* Store appropriate registers in xenpmu_data */ /* FIXME: 32-bit PVH should go here as well */ if ( is_pv_32bit_vcpu(sampling) ) { /* * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) * and therefore we treat it the same way as a non-privileged * PV 32-bit domain. */ struct compat_pmu_regs *cmp; cur_regs = guest_cpu_user_regs(); cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; cmp->ip = cur_regs->rip; cmp->sp = cur_regs->rsp; cmp->flags = cur_regs->eflags; cmp->ss = cur_regs->ss; cmp->cs = cur_regs->cs; if ( (cmp->cs & 3) > 1 ) *flags |= PMU_SAMPLE_USER; } else { struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; if ( (vpmu_mode & XENPMU_MODE_SELF) ) cur_regs = guest_cpu_user_regs(); else if ( !guest_mode(regs) && is_hardware_domain(sampling->domain) ) { cur_regs = regs; domid = DOMID_XEN; } else cur_regs = guest_cpu_user_regs(); r->ip = cur_regs->rip; r->sp = cur_regs->rsp; r->flags = cur_regs->eflags; if ( !has_hvm_container_vcpu(sampled) ) { r->ss = cur_regs->ss; r->cs = cur_regs->cs; if ( !(sampled->arch.flags & TF_kernel_mode) ) *flags |= PMU_SAMPLE_USER; } else { struct segment_register seg; hvm_get_segment_register(sampled, x86_seg_cs, &seg); r->cs = seg.sel; hvm_get_segment_register(sampled, x86_seg_ss, &seg); r->ss = seg.sel; r->cpl = seg.attr.fields.dpl; if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) *flags |= PMU_SAMPLE_REAL; } } vpmu->xenpmu_data->domain_id = domid; vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; if ( is_hardware_domain(sampling->domain) ) vpmu->xenpmu_data->pcpu_id = smp_processor_id(); else vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id; vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); *flags |= PMU_CACHED; vpmu_set(vpmu, VPMU_CACHED); send_guest_vcpu_virq(sampling, VIRQ_XENPMU); return; } /* HVM guests */ vlapic = vcpu_vlapic(sampling); /* We don't support (yet) HVM dom0 */ ASSERT(sampling == sampled); if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || !is_vlapic_lvtpc_enabled(vlapic) ) return; vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) { case APIC_MODE_FIXED: vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); break; case APIC_MODE_NMI: sampling->nmi_pending = 1; break; } }
void vmx_intr_assist(void) { struct hvm_intack intack; struct vcpu *v = current; unsigned int tpr_threshold = 0; enum hvm_intblk intblk; int pt_vector = -1; /* Block event injection when single step with MTF. */ if ( unlikely(v->arch.hvm_vcpu.single_step) ) { v->arch.hvm_vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG; vmx_update_cpu_exec_control(v); return; } /* Crank the handle on interrupt state. */ if ( is_hvm_vcpu(v) ) pt_vector = pt_update_irq(v); do { unsigned long intr_info; intack = hvm_vcpu_has_pending_irq(v); if ( likely(intack.source == hvm_intsrc_none) ) goto out; if ( unlikely(nvmx_intr_intercept(v, intack)) ) goto out; intblk = hvm_interrupt_blocked(v, intack); if ( cpu_has_vmx_virtual_intr_delivery ) { /* Set "Interrupt-window exiting" for ExtINT and NMI. */ if ( (intblk != hvm_intblk_none) && (intack.source == hvm_intsrc_pic || intack.source == hvm_intsrc_vector || intack.source == hvm_intsrc_nmi) ) { vmx_enable_intr_window(v, intack); goto out; } __vmread(VM_ENTRY_INTR_INFO, &intr_info); if ( intr_info & INTR_INFO_VALID_MASK ) { if ( (intack.source == hvm_intsrc_pic) || (intack.source == hvm_intsrc_nmi) || (intack.source == hvm_intsrc_mce) ) vmx_enable_intr_window(v, intack); goto out; } } else if ( intblk == hvm_intblk_tpr ) { ASSERT(vlapic_enabled(vcpu_vlapic(v))); ASSERT(intack.source == hvm_intsrc_lapic); tpr_threshold = intack.vector >> 4; goto out; } else if ( intblk != hvm_intblk_none ) { vmx_enable_intr_window(v, intack); goto out; } else { __vmread(VM_ENTRY_INTR_INFO, &intr_info); if ( intr_info & INTR_INFO_VALID_MASK ) { vmx_enable_intr_window(v, intack); goto out; } } intack = hvm_vcpu_ack_pending_irq(v, intack); } while ( intack.source == hvm_intsrc_none );