void restore_host_cr4_vmxe(struct per_cpu_data *cpu_data) { if (cpu_data->host_cr4_vmxe) { if (cpu_data->vmm_flag & VMXON_HAX) { // TODO: Need to understand why this happens (on both Windows and // macOS) hax_debug("HAX: VMM flag (VMON_HAX) is not clear!\n"); } set_cr4(get_cr4() | CR4_VMXE); } else { set_cr4(get_cr4() & (~CR4_VMXE)); } }
ac_bool test_crs(void) { ac_bool error = AC_FALSE; union cr0_u cr0 = { .raw = get_cr0() }; // cr1 is reserved ac_uint cr2 = get_cr2(); union cr3_u cr3 = { .raw = get_cr3() }; union cr4_u cr4 = { .raw = get_cr4() }; ac_uint cr8 = get_cr8(); print_cr0("cr0", cr0.raw); ac_printf("cr2: 0x%p\n", cr2); print_cr3("cr3", cr3.raw); print_cr4("cr4", cr4.raw); ac_printf("cr8: 0x%p\n", cr8); set_cr0(cr0.raw); // cr2 is read only set_cr3(cr3.raw); set_cr4(cr4.raw); set_cr8(cr8); ac_uint cr0_1 = get_cr0(); ac_uint cr3_1 = get_cr3(); ac_uint cr4_1 = get_cr4(); ac_uint cr8_1 = get_cr8(); error |= AC_TEST(cr0.raw == cr0_1); error |= AC_TEST(cr3.raw == cr3_1); error |= AC_TEST(cr4.raw == cr4_1); error |= AC_TEST(cr8 == cr8_1); return error; }
void cpu_pmc_control(void *enablep) { boolean_t enable = *(boolean_t *)enablep; cpu_data_t *cdp = current_cpu_datap(); if (enable) { wrmsr64(0x38F, 0x70000000FULL); wrmsr64(0x38D, 0x333); set_cr4(get_cr4() | CR4_PCE); } else { wrmsr64(0x38F, 0); wrmsr64(0x38D, 0); set_cr4((get_cr4() & ~CR4_PCE)); } cdp->cpu_fixed_pmcs_enabled = enable; }
void vmx_vm_cpu_skillz_init() { set_cr4(get_cr4()|CR4_OSXSAVE); vmx_cpu_features(); vmx_cpu_skillz(); }
/* ----------------------------------------------------------------------------- vmx_on() Enter VMX root operation on this CPU. -------------------------------------------------------------------------- */ static void vmx_on(void) { vmx_cpu_t *cpu = ¤t_cpu_datap()->cpu_vmx; addr64_t vmxon_region_paddr; int result; vmx_init(); assert(cpu->specs.vmx_present); if (NULL == cpu->vmxon_region) panic("vmx_on: VMXON region not allocated"); vmxon_region_paddr = vmx_paddr(cpu->vmxon_region); /* * Enable VMX operation. */ set_cr4(get_cr4() | CR4_VMXE); assert(vmx_is_cr0_valid(&cpu->specs)); assert(vmx_is_cr4_valid(&cpu->specs)); if ((result = __vmxon(&vmxon_region_paddr)) != VMX_SUCCEED) { panic("vmx_on: unexpected return %d from __vmxon()", result); } }
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu(void) { unsigned short status, control; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ if (cpuid_features() & CPUID_FEATURE_FXSR) { fp_kind = FP_FXSR; set_cr4(get_cr4() | CR4_FXS); printf("Enabling XMM register save/restore"); /* And allow SIMD instructions if present */ if (cpuid_features() & CPUID_FEATURE_SSE) { printf(" and SSE/SSE2"); set_cr4(get_cr4() | CR4_XMM); } printf(" opcodes\n"); } /* * Trap wait instructions. Turn off FPU for now. */ set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ fp_kind = FP_NO; set_cr0(get_cr0() | CR0_EM); } }
void mca_cpu_init(void) { unsigned int i; /* * The first (boot) processor is responsible for discovering the * machine check architecture present on this machine. */ if (!mca_initialized) { mca_get_availability(); mca_initialized = TRUE; simple_lock_init(&mca_lock, 0); } if (mca_MCA_present) { /* Enable all MCA features */ if (mca_control_MSR_present) wrmsr64(IA32_MCG_CTL, IA32_MCG_CTL_ENABLE); switch (mca_family) { case 0x06: /* Enable all but mc0 */ for (i = 1; i < mca_error_bank_count; i++) wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); /* Clear all errors */ for (i = 0; i < mca_error_bank_count; i++) wrmsr64(IA32_MCi_STATUS(i), 0ULL); break; case 0x0F: /* Enable all banks */ for (i = 0; i < mca_error_bank_count; i++) wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); /* Clear all errors */ for (i = 0; i < mca_error_bank_count; i++) wrmsr64(IA32_MCi_STATUS(i), 0ULL); break; } } /* Enable machine check exception handling if available */ if (mca_MCE_present) { set_cr4(get_cr4()|CR4_MCE); } }
static void guest_code(void) { uint64_t cr4; /* turn on CR4.OSXSAVE */ cr4 = get_cr4(); cr4 |= X86_CR4_OSXSAVE; set_cr4(cr4); /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ GUEST_ASSERT(cr4_cpuid_is_sync()); /* notify hypervisor to change CR4 */ GUEST_SYNC(0); /* check again */ GUEST_ASSERT(cr4_cpuid_is_sync()); GUEST_DONE(); }
/* * Write bytes to kernel address space for debugger. */ void db_write_bytes( vm_offset_t addr, int size, char *data, task_t task) { char *dst; pt_entry_t *ptep0 = 0; pt_entry_t oldmap0 = 0; vm_offset_t addr1; pt_entry_t *ptep1 = 0; pt_entry_t oldmap1 = 0; extern char etext; if ((addr < VM_MIN_KERNEL_ADDRESS) ^ ((addr + size) <= VM_MIN_KERNEL_ADDRESS)) { db_error("\ncannot write data into mixed space\n"); /* NOTREACHED */ } if (addr < VM_MIN_KERNEL_ADDRESS) { if (task) { db_write_bytes_user_space(addr, size, data, task); return; } else if (db_current_task() == TASK_NULL) { db_printf("\nbad address %x\n", addr); db_error(0); /* NOTREACHED */ } } if (addr >= VM_MIN_KERNEL_ADDRESS && addr <= (vm_offset_t)&etext) { ptep0 = pmap_pte(kernel_pmap, addr); oldmap0 = *ptep0; *ptep0 |= INTEL_PTE_WRITE; addr1 = i386_trunc_page(addr + size - 1); if (i386_trunc_page(addr) != addr1) { /* data crosses a page boundary */ ptep1 = pmap_pte(kernel_pmap, addr1); oldmap1 = *ptep1; *ptep1 |= INTEL_PTE_WRITE; } if (CPU_HAS_FEATURE(CPU_FEATURE_PGE)) set_cr4(get_cr4() & ~CR4_PGE); flush_tlb(); } dst = (char *)addr; while (--size >= 0) *dst++ = *data++; if (ptep0) { *ptep0 = oldmap0; if (ptep1) { *ptep1 = oldmap1; } flush_tlb(); if (CPU_HAS_FEATURE(CPU_FEATURE_PGE)) set_cr4(get_cr4() | CR4_PGE); } }
uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags) { struct per_cpu_data *cpu_data; paddr_t vmcs_phy; paddr_t curr_vmcs = VMCS_NONE; vmx_error_t err = 0; uint64 fc_msr; hax_disable_preemption(flags); /* when wake up from sleep, we need the barrier, as vm operation * are not serialized instructions. */ smp_mb(); cpu_data = current_cpu_data(); if (vcpu && is_vmcs_loaded(vcpu)) { cpu_data->nested++; return 0; } cpu_data->host_cr4_vmxe = (get_cr4() & CR4_VMXE); if(cpu_data->host_cr4_vmxe) { if (debug_vmcs_count % 100000 == 0) { hax_debug("host VT has enabled!\n"); hax_debug("Cr4 value = 0x%lx\n", get_cr4()); log_host_cr4_vmxe = 1; log_host_cr4 = get_cr4(); } debug_vmcs_count++; } set_cr4(get_cr4() | CR4_VMXE); /* HP systems & Mac systems workaround * When resuming from S3, some HP/Mac set the IA32_FEATURE_CONTROL MSR to * zero. Setting the lock bit to zero & then doing 'vmxon' would cause a GP. * As a workaround, when we see this condition, we enable the bits so that * we can launch vmxon & thereby hax. * bit 0 - Lock bit * bit 2 - Enable VMX outside SMX operation * * ********* To Do ************************************** * This is the workground to fix BSOD when resume from S3 * The best way is to add one power management handler, and set * IA32_FEATURE_CONTROL MSR in that PM S3 handler * ***************************************************** */ fc_msr = ia32_rdmsr(IA32_FEATURE_CONTROL); if (!(fc_msr & FC_LOCKED)) ia32_wrmsr(IA32_FEATURE_CONTROL, fc_msr | FC_LOCKED | FC_VMXON_OUTSMX); err = __vmxon(hax_page_pa(cpu_data->vmxon_page)); log_vmxon_err = err; log_vmxon_addr = hax_page_pa(cpu_data->vmxon_page); if (!(err & VMX_FAIL_MASK)) cpu_data->vmm_flag |= VMXON_HAX; else { bool fatal = true; #ifdef __MACH__ if ((err & VMX_FAIL_INVALID) && cpu_data->host_cr4_vmxe) { // On macOS, if VMXON fails with VMX_FAIL_INVALID and host CR4.VMXE // was already set, it is very likely that another VMM (VirtualBox // or any VMM based on macOS Hypervisor Framework, e.g. Docker) is // running and did not call VMXOFF. In that case, the current host // logical processor is already in VMX operation, and we can use an // innocuous VMX instruction (VMPTRST) to confirm that. // However, if the above assumption is wrong and the host processor // is not actually in VMX operation, VMPTRST will probably cause a // host reboot. But we don't have a better choice, and it is worth // taking the risk. curr_vmcs = __vmptrst(); if (curr_vmcs == VMCS_NONE) { hax_debug("Already in VMX operation, courtesy of another" " VMM (VirtualBox or macOS Hypervisor Framework)\n"); fatal = false; // Indicate that it is not necessary to call VMXOFF later cpu_data->vmm_flag &= ~VMXON_HAX; } else { // Should never happen hax_error("VMXON failed with VMX_FAIL_INVALID, but there is a" " current VMCS at 0x%llx\n", curr_vmcs); } } #endif if (fatal) { hax_error("VMXON failed for region 0x%llx (err=0x%x)\n", hax_page_pa(cpu_data->vmxon_page), (uint32) err); restore_host_cr4_vmxe(cpu_data); if (err & VMX_FAIL_INVALID) { log_vmxon_err_type1 = 1; } else { // TODO: Should VMX_FAIL_VALID be ignored? The current VMCS can // be cleared (deactivated and saved to memory) using VMCLEAR log_vmxon_err_type2 = 1; } hax_enable_preemption(flags); return VMXON_FAIL; } } if (vcpu) ((vmcs_t*)(hax_page_va(vcpu->vmcs_page)))->_revision_id = cpu_data->vmx_info._vmcs_revision_id; if (vcpu) vmcs_phy = vcpu_vmcs_pa(vcpu); else vmcs_phy = hax_page_pa(cpu_data->vmcs_page); if (__vmptrld(vmcs_phy) != VMX_SUCCEED) { hax_error("HAX: vmptrld failed (%08llx)\n", vmcs_phy); cpu_data->vmm_flag = 0; __vmxoff(); restore_host_cr4_vmxe(cpu_data); log_vmxon_err_type3 = 1; hax_enable_preemption(flags); return VMPTRLD_FAIL; } if (vcpu) { vcpu->is_vmcs_loaded = 1; cpu_data->current_vcpu = vcpu; vcpu->cpu_id = hax_cpuid(); } cpu_data->other_vmcs = curr_vmcs; return VMXON_SUCCESS; }
void pmap_pcid_configure(void) { int ccpu = cpu_number(); uintptr_t cr4 = get_cr4(); boolean_t pcid_present = FALSE; pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); pmap_assert(cpu_mode_is64bit()); if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) { pmap_pcid_log("PMAP: PCID feature disabled\n"); printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); } /* no_shared_cr3+PCID is currently unsupported */ #if DEBUG if (pmap_pcid_disabled == FALSE) no_shared_cr3 = FALSE; else no_shared_cr3 = TRUE; #else if (no_shared_cr3) pmap_pcid_disabled = TRUE; #endif if (pmap_pcid_disabled || no_shared_cr3) { unsigned i; /* Reset PCID status, as we may have picked up * strays if discovered prior to platform * expert initialization. */ for (i = 0; i < real_ncpus; i++) { if (cpu_datap(i)) { cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE; } pmap_pcid_ncpus = 0; } cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; return; } /* DRKTODO: assert if features haven't been discovered yet. Redundant * invocation of cpu_mode_init and descendants masks this for now. */ if ((cpuid_features() & CPUID_FEATURE_PCID)) pcid_present = TRUE; else { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); return; } if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); return; } if (pcid_present == TRUE) { pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); if (cpu_number() >= PMAP_PCID_MAX_CPUS) { panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number()); } if ((get_cr4() & CR4_PGE) == 0) { set_cr4(get_cr4() | CR4_PGE); pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu); } set_cr4(get_cr4() | CR4_PCIDE); pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); tlb_flush_global(); cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus); } cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel = &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]); cpu_datap(ccpu)->cpu_pcid_refcounts[0] = 1; } }
// the kernel bootstrap routine PUBLIC void kernel_thread_t::bootstrap() { // Initializations done -- helping_lock_t can now use helping lock helping_lock_t::threading_system_active = true; // // set up my own thread control block // state_change (0, Thread_running); sched()->set_prio (config::kernel_prio); sched()->set_mcp (config::kernel_mcp); sched()->set_timeslice (config::default_time_slice); sched()->set_ticks_left (config::default_time_slice); present_next = present_prev = this; ready_next = ready_prev = this; // // set up class variables // for (int i = 0; i < 256; i++) prio_next[i] = prio_first[i] = 0; prio_next[config::kernel_prio] = prio_first[config::kernel_prio] = this; prio_highest = config::kernel_prio; timeslice_ticks_left = config::default_time_slice; timeslice_owner = this; // // install our slow trap handler // nested_trap_handler = base_trap_handler; base_trap_handler = thread_handle_trap; // // initialize FPU // set_ts(); // FPU ops -> exception // // initialize interrupts // irq_t::lookup(2)->alloc(this, false); // reserve cascade irq irq_t::lookup(8)->alloc(this, false); // reserve timer irq pic_enable_irq(2); // allow cascaded irqs // set up serial console if (! strstr(kmem::cmdline(), " -I-") && !strstr(kmem::cmdline(), " -irqcom")) { int com_port = console::serial_com_port; int com_irq = com_port & 1 ? 4 : 3; irq_t::lookup(com_irq)->alloc(this, false); // the remote-gdb interrupt pic_enable_irq(com_irq); // for some reason, we have to re-enable the com irq here if (config::serial_esc) com_cons_enable_receive_interrupt(); } // initialize the profiling timer bool user_irq0 = strstr(kmem::cmdline(), "irq0"); if (config::profiling) { if (user_irq0) { kdb_ke("options `-profile' and `-irq0' don't mix " "-- disabling `-irq0'"); } irq_t::lookup(0)->alloc(this, false); profile::init(); if (strstr(kmem::cmdline(), " -profstart")) profile::start(); } else if (! user_irq0) irq_t::lookup(0)->alloc(this, false); // reserve irq0 even though // we don't use it // // set up timer interrupt (~ 1ms) // while (rtcin(RTC_STATUSA) & RTCSA_TUP) ; // wait till RTC ready rtcout(RTC_STATUSA, RTCSA_DIVIDER | RTCSA_1024); // 1024 Hz // set up 1024 Hz interrupt rtcout(RTC_STATUSB, rtcin(RTC_STATUSB) | RTCSB_PINTR | RTCSB_SQWE); rtcin(RTC_INTR); // reset pic_enable_irq(8); // allow this interrupt // // set PCE-Flag in CR4 to enable read of performace measurement counters // in usermode. PMC were introduced in Pentium MMX and PPro processors. // #ifndef CPUF_MMX #define CPUF_MMX 0x00800000 #endif if(strncmp(cpu.vendor_id, "GenuineIntel", 12) == 0 && (cpu.family == CPU_FAMILY_PENTIUM_PRO || cpu.feature_flags & CPUF_MMX)) { set_cr4(get_cr4() | CR4_PCE); } // // allow the boot task to create more tasks // for (unsigned i = config::boot_taskno + 1; i < space_index_t::max_space_number; i++) { check(space_index_t(i).set_chief(space_index(), space_index_t(config::boot_taskno))); } // // create sigma0 // // sigma0's chief is the boot task space_index_t(config::sigma0_id.id.task). set_chief(space_index(), space_index_t(config::sigma0_id.id.chief)); sigma0 = new space_t(config::sigma0_id.id.task); sigma0_thread = new (&config::sigma0_id) thread_t (sigma0, &config::sigma0_id, config::sigma0_prio, config::sigma0_mcp); // push address of kernel info page to sigma0's stack vm_offset_t esp = kmem::info()->sigma0_esp; * reinterpret_cast<vm_offset_t*>(kmem::phys_to_virt(--esp)) = kmem::virt_to_phys(kmem::info()); sigma0_thread->initialize(kmem::info()->sigma0_eip, esp, 0, 0); // // create the boot task // // the boot task's chief is the boot task itself space_index_t(config::boot_id.id.task). set_chief(space_index(), space_index_t(config::boot_id.id.chief)); space_t *boot = new space_t(config::boot_id.id.task); thread_t *boot_thread = new (&config::boot_id) thread_t (boot, &config::boot_id, config::boot_prio, config::boot_mcp); boot_thread->initialize(0x200000, 0x200000, sigma0_thread, 0); // // the idle loop // for (;;) { // printf("I"); sti(); // enable irqs, otherwise idling is fatal if (config::hlt_works_ok) asm("hlt"); // stop the CPU, waiting for an int while (ready_next != this) // are there any other threads ready? schedule(); } }