int vmx_add_guest_msr(struct vcpu *v, u32 msr) { unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; for ( i = 0; i < msr_count; i++ ) if ( msr_area[i].index == msr ) return 0; if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) ) return -ENOSPC; if ( msr_area == NULL ) { if ( (msr_area = alloc_xenheap_page()) == NULL ) return -ENOMEM; v->arch.hvm_vmx.msr_area = msr_area; __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area)); __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); } msr_area[msr_count].index = msr; msr_area[msr_count].mbz = 0; msr_area[msr_count].data = 0; v->arch.hvm_vmx.msr_count = ++msr_count; __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count); __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count); return 0; }
int vmx_create_vmcs(struct vcpu *v) { struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; int rc; if ( arch_vmx->vmcs == NULL ) { if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL ) return -ENOMEM; INIT_LIST_HEAD(&arch_vmx->active_list); __vmpclear(virt_to_maddr(arch_vmx->vmcs)); arch_vmx->active_cpu = -1; arch_vmx->launched = 0; } if ( (rc = construct_vmcs(v)) != 0 ) { vmx_free_vmcs(arch_vmx->vmcs); arch_vmx->vmcs = NULL; return rc; } return 0; }
int svm_create_vmcb(struct vcpu *v) { struct nestedvcpu *nv = &vcpu_nestedhvm(v); struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; int rc; if ( (nv->nv_n1vmcx == NULL) && (nv->nv_n1vmcx = alloc_vmcb()) == NULL ) { printk("Failed to create a new VMCB\n"); return -ENOMEM; } arch_svm->vmcb = nv->nv_n1vmcx; rc = construct_vmcb(v); if ( rc != 0 ) { free_vmcb(nv->nv_n1vmcx); nv->nv_n1vmcx = NULL; arch_svm->vmcb = NULL; return rc; } arch_svm->vmcb_pa = nv->nv_n1vmcx_pa = virt_to_maddr(arch_svm->vmcb); return 0; }
int machine_kexec_get_xen(xen_kexec_range_t *range) { range->start = virt_to_maddr(_start); range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start; return 0; }
/* Release all __init and __initdata ranges to be reused */ void free_init_memory(void) { paddr_t pa = virt_to_maddr(__init_begin); unsigned long len = __init_end - __init_begin; set_pte_flags_on_range(__init_begin, len, mg_rw); memset(__init_begin, 0xcc, len); set_pte_flags_on_range(__init_begin, len, mg_clear); init_domheap_pages(pa, pa + len); printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10); }
void dump_hyp_walk(vaddr_t addr) { uint64_t ttbr = READ_SYSREG64(TTBR0_EL2); lpae_t *pgtable = THIS_CPU_PGTABLE; printk("Walking Hypervisor VA 0x%"PRIvaddr" " "on CPU%d via TTBR 0x%016"PRIx64"\n", addr, smp_processor_id(), ttbr); if ( smp_processor_id() == 0 ) BUG_ON( (lpae_t *)(unsigned long)(ttbr - phys_offset) != pgtable ); else BUG_ON( virt_to_maddr(pgtable) != ttbr ); dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1); }
static void vmx_load_vmcs(struct vcpu *v) { unsigned long flags; local_irq_save(flags); if ( v->arch.hvm_vmx.active_cpu == -1 ) { list_add(&v->arch.hvm_vmx.active_list, &this_cpu(active_vmcs_list)); v->arch.hvm_vmx.active_cpu = smp_processor_id(); } ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id()); __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs)); this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs; local_irq_restore(flags); }
static void __vmx_clear_vmcs(void *info) { struct vcpu *v = info; struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */ ASSERT(!local_irq_is_enabled()); if ( arch_vmx->active_cpu == smp_processor_id() ) { __vmpclear(virt_to_maddr(arch_vmx->vmcs)); arch_vmx->active_cpu = -1; arch_vmx->launched = 0; list_del(&arch_vmx->active_list); if ( arch_vmx->vmcs == this_cpu(current_vmcs) ) this_cpu(current_vmcs) = NULL; } }
static int construct_vmcs(struct vcpu *v) { uint16_t sysenter_cs; unsigned long sysenter_eip; vmx_vmcs_enter(v); /* VMCS controls. */ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control); v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control); /* MSR access bitmap. */ if ( cpu_has_vmx_msr_bitmap ) { char *msr_bitmap = alloc_xenheap_page(); if ( msr_bitmap == NULL ) return -ENOMEM; memset(msr_bitmap, ~0, PAGE_SIZE); v->arch.hvm_vmx.msr_bitmap = msr_bitmap; __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); vmx_disable_intercept_for_msr(v, MSR_FS_BASE); vmx_disable_intercept_for_msr(v, MSR_GS_BASE); vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); } /* I/O access bitmap. */ __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap)); __vmwrite(IO_BITMAP_B, virt_to_maddr(hvm_io_bitmap + PAGE_SIZE)); /* Host GDTR base. */ __vmwrite(HOST_GDTR_BASE, GDT_VIRT_START(v)); /* Host data selectors. */ __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); __vmwrite(HOST_FS_SELECTOR, 0); __vmwrite(HOST_GS_SELECTOR, 0); __vmwrite(HOST_FS_BASE, 0); __vmwrite(HOST_GS_BASE, 0); /* Host control registers. */ v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); __vmwrite(HOST_CR4, mmu_cr4_features); /* Host CS:RIP. */ __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); /* Host SYSENTER CS:RIP. */ rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); __vmwrite(HOST_SYSENTER_CS, sysenter_cs); rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); /* MSR intercepts. */ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); __vmwrite(VM_ENTRY_INTR_INFO, 0); __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); __vmwrite(CR3_TARGET_COUNT, 0); __vmwrite(GUEST_ACTIVITY_STATE, 0); /* Guest segment bases. */ __vmwrite(GUEST_ES_BASE, 0); __vmwrite(GUEST_SS_BASE, 0); __vmwrite(GUEST_DS_BASE, 0); __vmwrite(GUEST_FS_BASE, 0); __vmwrite(GUEST_GS_BASE, 0); __vmwrite(GUEST_CS_BASE, 0); /* Guest segment limits. */ __vmwrite(GUEST_ES_LIMIT, ~0u); __vmwrite(GUEST_SS_LIMIT, ~0u); __vmwrite(GUEST_DS_LIMIT, ~0u); __vmwrite(GUEST_FS_LIMIT, ~0u); __vmwrite(GUEST_GS_LIMIT, ~0u); __vmwrite(GUEST_CS_LIMIT, ~0u); /* Guest segment AR bytes. */ __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */ __vmwrite(GUEST_SS_AR_BYTES, 0xc093); __vmwrite(GUEST_DS_AR_BYTES, 0xc093); __vmwrite(GUEST_FS_AR_BYTES, 0xc093); __vmwrite(GUEST_GS_AR_BYTES, 0xc093); __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ /* Guest IDT. */ __vmwrite(GUEST_IDTR_BASE, 0); __vmwrite(GUEST_IDTR_LIMIT, 0); /* Guest GDT. */ __vmwrite(GUEST_GDTR_BASE, 0); __vmwrite(GUEST_GDTR_LIMIT, 0); /* Guest LDT. */ __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */ __vmwrite(GUEST_LDTR_SELECTOR, 0); __vmwrite(GUEST_LDTR_BASE, 0); __vmwrite(GUEST_LDTR_LIMIT, 0); /* Guest TSS. */ __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */ __vmwrite(GUEST_TR_BASE, 0); __vmwrite(GUEST_TR_LIMIT, 0xff); __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); __vmwrite(GUEST_DR7, 0); __vmwrite(VMCS_LINK_POINTER, ~0UL); #if defined(__i386__) __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL); #endif __vmwrite(EXCEPTION_BITMAP, (HVM_TRAP_MASK | (1U << TRAP_page_fault) | (1U << TRAP_no_device))); v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; hvm_update_guest_cr(v, 0); v->arch.hvm_vcpu.guest_cr[4] = 0; hvm_update_guest_cr(v, 4); if ( cpu_has_vmx_tpr_shadow ) { __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vcpu_vlapic(v)->regs_page)); __vmwrite(TPR_THRESHOLD, 0); } vmx_vmcs_exit(v); paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ vmx_vlapic_msr_changed(v); return 0; }
int vmx_cpu_up(void) { u32 eax, edx; int bios_locked, cpu = smp_processor_id(); u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1; BUG_ON(!(read_cr4() & X86_CR4_VMXE)); /* * Ensure the current processor operating mode meets * the requred CRO fixed bits in VMX operation. */ cr0 = read_cr0(); rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0); rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1); if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) ) { printk("CPU%d: some settings of host CR0 are " "not allowed in VMX operation.\n", cpu); return 0; } rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx); bios_locked = !!(eax & IA32_FEATURE_CONTROL_MSR_LOCK); if ( bios_locked ) { if ( !(eax & (IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) ) { printk("CPU%d: VMX disabled by BIOS.\n", cpu); return 0; } } else { eax = IA32_FEATURE_CONTROL_MSR_LOCK; eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX; if ( test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) ) eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX; wrmsr(IA32_FEATURE_CONTROL_MSR, eax, 0); } vmx_init_vmcs_config(); INIT_LIST_HEAD(&this_cpu(active_vmcs_list)); if ( this_cpu(host_vmcs) == NULL ) { this_cpu(host_vmcs) = vmx_alloc_vmcs(); if ( this_cpu(host_vmcs) == NULL ) { printk("CPU%d: Could not allocate host VMCS\n", cpu); return 0; } } switch ( __vmxon(virt_to_maddr(this_cpu(host_vmcs))) ) { case -2: /* #UD or #GP */ if ( bios_locked && test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) && (!(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX) || !(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) ) { printk("CPU%d: VMXON failed: perhaps because of TXT settings " "in your BIOS configuration?\n", cpu); printk(" --> Disable TXT in your BIOS unless using a secure " "bootloader.\n"); return 0; } /* fall through */ case -1: /* CF==1 or ZF==1 */ printk("CPU%d: unexpected VMXON failure\n", cpu); return 0; case 0: /* success */ break; default: BUG(); } return 1; }
/* This function can directly access fields which are covered by clean bits. */ static int construct_vmcb(struct vcpu *v) { struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; struct vmcb_struct *vmcb = arch_svm->vmcb; vmcb->_general1_intercepts = GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI | GENERAL1_INTERCEPT_SMI | GENERAL1_INTERCEPT_INIT | GENERAL1_INTERCEPT_CPUID | GENERAL1_INTERCEPT_INVD | GENERAL1_INTERCEPT_HLT | GENERAL1_INTERCEPT_INVLPG | GENERAL1_INTERCEPT_INVLPGA | GENERAL1_INTERCEPT_IOIO_PROT | GENERAL1_INTERCEPT_MSR_PROT | GENERAL1_INTERCEPT_SHUTDOWN_EVT| GENERAL1_INTERCEPT_TASK_SWITCH; vmcb->_general2_intercepts = GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL | GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE | GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI | GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_MWAIT | GENERAL2_INTERCEPT_WBINVD | GENERAL2_INTERCEPT_MONITOR | GENERAL2_INTERCEPT_XSETBV; /* Intercept all debug-register writes. */ vmcb->_dr_intercepts = ~0u; /* Intercept all control-register accesses except for CR2 and CR8. */ vmcb->_cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE | CR_INTERCEPT_CR8_READ | CR_INTERCEPT_CR8_WRITE); /* I/O and MSR permission bitmaps. */ arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0); if ( arch_svm->msrpm == NULL ) return -ENOMEM; memset(arch_svm->msrpm, 0xff, MSRPM_SIZE); svm_disable_intercept_for_msr(v, MSR_FS_BASE); svm_disable_intercept_for_msr(v, MSR_GS_BASE); svm_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE); svm_disable_intercept_for_msr(v, MSR_CSTAR); svm_disable_intercept_for_msr(v, MSR_LSTAR); svm_disable_intercept_for_msr(v, MSR_STAR); svm_disable_intercept_for_msr(v, MSR_SYSCALL_MASK); /* LWP_CBADDR MSR is saved and restored by FPU code. So SVM doesn't need to * intercept it. */ if ( cpu_has_lwp ) svm_disable_intercept_for_msr(v, MSR_AMD64_LWP_CBADDR); vmcb->_msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm); vmcb->_iopm_base_pa = (u64)virt_to_maddr(hvm_io_bitmap); /* Virtualise EFLAGS.IF and LAPIC TPR (CR8). */ vmcb->_vintr.fields.intr_masking = 1; /* Initialise event injection to no-op. */ vmcb->eventinj.bytes = 0; /* TSC. */ vmcb->_tsc_offset = 0; /* Don't need to intercept RDTSC if CPU supports TSC rate scaling */ if ( v->domain->arch.vtsc && !cpu_has_tsc_ratio ) { vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC; vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP; } /* Guest EFER. */ v->arch.hvm_vcpu.guest_efer = 0; hvm_update_guest_efer(v); /* Guest segment limits. */ vmcb->cs.limit = ~0u; vmcb->es.limit = ~0u; vmcb->ss.limit = ~0u; vmcb->ds.limit = ~0u; vmcb->fs.limit = ~0u; vmcb->gs.limit = ~0u; /* Guest segment bases. */ vmcb->cs.base = 0; vmcb->es.base = 0; vmcb->ss.base = 0; vmcb->ds.base = 0; vmcb->fs.base = 0; vmcb->gs.base = 0; /* Guest segment AR bytes. */ vmcb->es.attr.bytes = 0xc93; /* read/write, accessed */ vmcb->ss.attr.bytes = 0xc93; vmcb->ds.attr.bytes = 0xc93; vmcb->fs.attr.bytes = 0xc93; vmcb->gs.attr.bytes = 0xc93; vmcb->cs.attr.bytes = 0xc9b; /* exec/read, accessed */ /* Guest IDT. */ vmcb->idtr.base = 0; vmcb->idtr.limit = 0; /* Guest GDT. */ vmcb->gdtr.base = 0; vmcb->gdtr.limit = 0; /* Guest LDT. */ vmcb->ldtr.sel = 0; vmcb->ldtr.base = 0; vmcb->ldtr.limit = 0; vmcb->ldtr.attr.bytes = 0; /* Guest TSS. */ vmcb->tr.attr.bytes = 0x08b; /* 32-bit TSS (busy) */ vmcb->tr.base = 0; vmcb->tr.limit = 0xff; v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; hvm_update_guest_cr(v, 0); v->arch.hvm_vcpu.guest_cr[4] = 0; hvm_update_guest_cr(v, 4); paging_update_paging_modes(v); vmcb->_exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_no_device); if ( paging_mode_hap(v->domain) ) { vmcb->_np_enable = 1; /* enable nested paging */ vmcb->_g_pat = MSR_IA32_CR_PAT_RESET; /* guest PAT */ vmcb->_h_cr3 = pagetable_get_paddr( p2m_get_pagetable(p2m_get_hostp2m(v->domain))); /* No point in intercepting CR3 reads/writes. */ vmcb->_cr_intercepts &= ~(CR_INTERCEPT_CR3_READ|CR_INTERCEPT_CR3_WRITE); /* * No point in intercepting INVLPG if we don't have shadow pagetables * that need to be fixed up. */ vmcb->_general1_intercepts &= ~GENERAL1_INTERCEPT_INVLPG; /* PAT is under complete control of SVM when using nested paging. */ svm_disable_intercept_for_msr(v, MSR_IA32_CR_PAT); } else { vmcb->_exception_intercepts |= (1U << TRAP_page_fault); } if ( cpu_has_pause_filter ) { vmcb->_pause_filter_count = SVM_PAUSEFILTER_INIT; vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_PAUSE; } vmcb->cleanbits.bytes = 0; return 0; }