static int __init pvh_setup_vmx_realmode_helpers(struct domain *d) { p2m_type_t p2mt; uint32_t rc, *ident_pt; mfn_t mfn; paddr_t gaddr; struct vcpu *v = d->vcpu[0]; /* * Steal some space from the last RAM region below 4GB and use it to * store the real-mode TSS. It needs to be aligned to 128 so that the * TSS structure (which accounts for the first 104b) doesn't cross * a page boundary. */ if ( !pvh_steal_ram(d, HVM_VM86_TSS_SIZE, 128, GB(4), &gaddr) ) { if ( hvm_copy_to_guest_phys(gaddr, NULL, HVM_VM86_TSS_SIZE, v) != HVMCOPY_okay ) printk("Unable to zero VM86 TSS area\n"); d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZED] = VM86_TSS_UPDATED | ((uint64_t)HVM_VM86_TSS_SIZE << 32) | gaddr; if ( pvh_add_mem_range(d, gaddr, gaddr + HVM_VM86_TSS_SIZE, E820_RESERVED) ) printk("Unable to set VM86 TSS as reserved in the memory map\n"); } else printk("Unable to allocate VM86 TSS area\n"); /* Steal some more RAM for the identity page tables. */ if ( pvh_steal_ram(d, PAGE_SIZE, PAGE_SIZE, GB(4), &gaddr) ) { printk("Unable to find memory to stash the identity page tables\n"); return -ENOMEM; } /* * Identity-map page table is required for running with CR0.PG=0 * when using Intel EPT. Create a 32-bit non-PAE page directory of * superpages. */ ident_pt = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)), &mfn, &p2mt, 0, &rc); if ( ident_pt == NULL ) { printk("Unable to map identity page tables\n"); return -ENOMEM; } write_32bit_pse_identmap(ident_pt); unmap_domain_page(ident_pt); put_page(mfn_to_page(mfn_x(mfn))); d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] = gaddr; if ( pvh_add_mem_range(d, gaddr, gaddr + PAGE_SIZE, E820_RESERVED) ) printk("Unable to set identity page tables as reserved in the memory map\n"); return 0; }
static int process_portio_intercept(portio_action_t action, ioreq_t *p) { int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1; uint32_t data; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { rc = action(IOREQ_READ, p->addr, p->size, &data); p->data = data; } else { data = p->data; rc = action(IOREQ_WRITE, p->addr, p->size, &data); } return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { rc = action(IOREQ_READ, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size, &data, p->size); } } else /* p->dir == IOREQ_WRITE */ { for ( i = 0; i < p->count; i++ ) { data = 0; (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size, p->size); rc = action(IOREQ_WRITE, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; } } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
static int hvm_mmio_access(struct vcpu *v, ioreq_t *p, hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { unsigned long data; int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { rc = read_handler(v, p->addr, p->size, &data); p->data = data; } else /* p->dir == IOREQ_WRITE */ rc = write_handler(v, p->addr, p->size, p->data); return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { int ret; rc = read_handler(v, p->addr + (sign * i * p->size), p->size, &data); if ( rc != X86EMUL_OKAY ) break; ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data, p->size); if ( (ret == HVMCOPY_gfn_paged_out) || (ret == HVMCOPY_gfn_shared) ) { rc = X86EMUL_RETRY; break; } } } else { for ( i = 0; i < p->count; i++ ) { switch ( hvm_copy_from_guest_phys(&data, p->data + sign * i * p->size, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY ) break; rc = write_handler(v, p->addr + (sign * i * p->size), p->size, data); if ( rc != X86EMUL_OKAY ) break; } } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
static int process_portio_intercept(portio_action_t action, ioreq_t *p) { int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1; uint32_t data; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { rc = action(IOREQ_READ, p->addr, p->size, &data); p->data = data; } else { data = p->data; rc = action(IOREQ_WRITE, p->addr, p->size, &data); } return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { rc = action(IOREQ_READ, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size, &data, p->size); } } else /* p->dir == IOREQ_WRITE */ { for ( i = 0; i < p->count; i++ ) { data = 0; switch ( hvm_copy_from_guest_phys(&data, p->data + sign * i * p->size, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY ) break; rc = action(IOREQ_WRITE, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; } } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
static int __init pvh_setup_acpi(struct domain *d, paddr_t start_info) { unsigned long pfn, nr_pages; paddr_t madt_paddr, xsdt_paddr, rsdp_paddr; unsigned int i; int rc; struct acpi_table_rsdp *native_rsdp, rsdp = { .signature = ACPI_SIG_RSDP, .revision = 2, .length = sizeof(rsdp), }; /* Scan top-level tables and add their regions to the guest memory map. */ for( i = 0; i < acpi_gbl_root_table_list.count; i++ ) { const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii; unsigned long addr = acpi_gbl_root_table_list.tables[i].address; unsigned long size = acpi_gbl_root_table_list.tables[i].length; /* * Make sure the original MADT is also mapped, so that Dom0 can * properly access the data returned by _MAT methods in case it's * re-using MADT memory. */ if ( strncmp(sig, ACPI_SIG_MADT, ACPI_NAME_SIZE) ? pvh_acpi_table_allowed(sig) : !acpi_memory_banned(addr, size) ) pvh_add_mem_range(d, addr, addr + size, E820_ACPI); } /* Identity map ACPI e820 regions. */ for ( i = 0; i < d->arch.nr_e820; i++ ) { if ( d->arch.e820[i].type != E820_ACPI && d->arch.e820[i].type != E820_NVS ) continue; pfn = PFN_DOWN(d->arch.e820[i].addr); nr_pages = PFN_UP((d->arch.e820[i].addr & ~PAGE_MASK) + d->arch.e820[i].size); rc = modify_identity_mmio(d, pfn, nr_pages, true); if ( rc ) { printk("Failed to map ACPI region [%#lx, %#lx) into Dom0 memory map\n", pfn, pfn + nr_pages); return rc; } } rc = pvh_setup_acpi_madt(d, &madt_paddr); if ( rc ) return rc; rc = pvh_setup_acpi_xsdt(d, madt_paddr, &xsdt_paddr); if ( rc ) return rc; /* Craft a custom RSDP. */ native_rsdp = acpi_os_map_memory(acpi_os_get_root_pointer(), sizeof(rsdp)); if ( !native_rsdp ) { printk("Failed to map native RSDP\n"); return -ENOMEM; } memcpy(rsdp.oem_id, native_rsdp->oem_id, sizeof(rsdp.oem_id)); acpi_os_unmap_memory(native_rsdp, sizeof(rsdp)); rsdp.xsdt_physical_address = xsdt_paddr; /* * Calling acpi_tb_checksum here is a layering violation, but * introducing a wrapper for such simple usage seems overkill. */ rsdp.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp), ACPI_RSDP_REV0_SIZE); rsdp.extended_checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp), sizeof(rsdp)); /* * Place the new RSDP in guest memory space. * * NB: this RSDP is not going to replace the original RSDP, which should * still be accessible to the guest. However that RSDP is going to point to * the native RSDT, and should not be used for the Dom0 kernel's boot * purposes (we keep it visible for post boot access). */ if ( pvh_steal_ram(d, sizeof(rsdp), 0, GB(4), &rsdp_paddr) ) { printk("Unable to allocate guest RAM for RSDP\n"); return -ENOMEM; } /* Mark this region as E820_ACPI. */ if ( pvh_add_mem_range(d, rsdp_paddr, rsdp_paddr + sizeof(rsdp), E820_ACPI) ) printk("Unable to add RSDP region to memory map\n"); /* Copy RSDP into guest memory. */ rc = hvm_copy_to_guest_phys(rsdp_paddr, &rsdp, sizeof(rsdp), d->vcpu[0]); if ( rc ) { printk("Unable to copy RSDP into guest memory\n"); return rc; } /* Copy RSDP address to start_info. */ rc = hvm_copy_to_guest_phys(start_info + offsetof(struct hvm_start_info, rsdp_paddr), &rsdp_paddr, sizeof(((struct hvm_start_info *) 0)->rsdp_paddr), d->vcpu[0]); if ( rc ) { printk("Unable to copy RSDP into guest memory\n"); return rc; } return 0; } int __init dom0_construct_pvh(struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *(*bootstrap_map)(const module_t *), char *cmdline) { paddr_t entry, start_info; int rc; printk("** Building a PVH Dom0 **\n"); iommu_hwdom_init(d); rc = pvh_setup_p2m(d); if ( rc ) { printk("Failed to setup Dom0 physical memory map\n"); return rc; } rc = pvh_load_kernel(d, image, image_headroom, initrd, bootstrap_map(image), cmdline, &entry, &start_info); if ( rc ) { printk("Failed to load Dom0 kernel\n"); return rc; } rc = pvh_setup_cpus(d, entry, start_info); if ( rc ) { printk("Failed to setup Dom0 CPUs: %d\n", rc); return rc; } rc = pvh_setup_acpi(d, start_info); if ( rc ) { printk("Failed to setup Dom0 ACPI tables: %d\n", rc); return rc; } panic("Building a PVHv2 Dom0 is not yet supported."); return 0; }
static int __init pvh_setup_acpi_xsdt(struct domain *d, paddr_t madt_addr, paddr_t *addr) { struct acpi_table_xsdt *xsdt; struct acpi_table_header *table; struct acpi_table_rsdp *rsdp; unsigned long size = sizeof(*xsdt); unsigned int i, j, num_tables = 0; paddr_t xsdt_paddr; int rc; /* * Restore original DMAR table signature, we are going to filter it from * the new XSDT that is presented to the guest, so it is no longer * necessary to have it's signature zapped. */ acpi_dmar_reinstate(); /* Count the number of tables that will be added to the XSDT. */ for( i = 0; i < acpi_gbl_root_table_list.count; i++ ) { const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii; if ( pvh_acpi_table_allowed(sig) ) num_tables++; } /* * No need to add or subtract anything because struct acpi_table_xsdt * includes one array slot already, and we have filtered out the original * MADT and we are going to add a custom built MADT. */ size += num_tables * sizeof(xsdt->table_offset_entry[0]); xsdt = xzalloc_bytes(size); if ( !xsdt ) { printk("Unable to allocate memory for XSDT table\n"); rc = -ENOMEM; goto out; } /* Copy the native XSDT table header. */ rsdp = acpi_os_map_memory(acpi_os_get_root_pointer(), sizeof(*rsdp)); if ( !rsdp ) { printk("Unable to map RSDP\n"); rc = -EINVAL; goto out; } xsdt_paddr = rsdp->xsdt_physical_address; acpi_os_unmap_memory(rsdp, sizeof(*rsdp)); table = acpi_os_map_memory(xsdt_paddr, sizeof(*table)); if ( !table ) { printk("Unable to map XSDT\n"); rc = -EINVAL; goto out; } xsdt->header = *table; acpi_os_unmap_memory(table, sizeof(*table)); /* Add the custom MADT. */ xsdt->table_offset_entry[0] = madt_addr; /* Copy the addresses of the rest of the allowed tables. */ for( i = 0, j = 1; i < acpi_gbl_root_table_list.count; i++ ) { const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii; if ( pvh_acpi_table_allowed(sig) ) xsdt->table_offset_entry[j++] = acpi_gbl_root_table_list.tables[i].address; } xsdt->header.revision = 1; xsdt->header.length = size; /* * Calling acpi_tb_checksum here is a layering violation, but * introducing a wrapper for such simple usage seems overkill. */ xsdt->header.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, xsdt), size); /* Place the new XSDT in guest memory space. */ if ( pvh_steal_ram(d, size, 0, GB(4), addr) ) { printk("Unable to find guest RAM for XSDT\n"); rc = -ENOMEM; goto out; } /* Mark this region as E820_ACPI. */ if ( pvh_add_mem_range(d, *addr, *addr + size, E820_ACPI) ) printk("Unable to add XSDT region to memory map\n"); rc = hvm_copy_to_guest_phys(*addr, xsdt, size, d->vcpu[0]); if ( rc ) { printk("Unable to copy XSDT into guest memory\n"); goto out; } rc = 0; out: xfree(xsdt); return rc; }
static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr) { struct acpi_table_madt *madt; struct acpi_table_header *table; struct acpi_madt_io_apic *io_apic; struct acpi_madt_local_x2apic *x2apic; acpi_status status; unsigned long size; unsigned int i, max_vcpus; int rc; /* Count number of interrupt overrides in the MADT. */ acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_count_intr_ovr, UINT_MAX); /* Count number of NMI sources in the MADT. */ acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_count_nmi_src, UINT_MAX); max_vcpus = dom0_max_vcpus(); /* Calculate the size of the crafted MADT. */ size = sizeof(*madt); size += sizeof(*io_apic) * nr_ioapics; size += sizeof(*intsrcovr) * acpi_intr_overrides; size += sizeof(*nmisrc) * acpi_nmi_sources; size += sizeof(*x2apic) * max_vcpus; madt = xzalloc_bytes(size); if ( !madt ) { printk("Unable to allocate memory for MADT table\n"); rc = -ENOMEM; goto out; } /* Copy the native MADT table header. */ status = acpi_get_table(ACPI_SIG_MADT, 0, &table); if ( !ACPI_SUCCESS(status) ) { printk("Failed to get MADT ACPI table, aborting.\n"); rc = -EINVAL; goto out; } madt->header = *table; madt->address = APIC_DEFAULT_PHYS_BASE; /* * NB: this is currently set to 4, which is the revision in the ACPI * spec 6.1. Sadly ACPICA doesn't provide revision numbers for the * tables described in the headers. */ madt->header.revision = min_t(unsigned char, table->revision, 4); /* Setup the IO APIC entries. */ io_apic = (void *)(madt + 1); for ( i = 0; i < nr_ioapics; i++ ) { io_apic->header.type = ACPI_MADT_TYPE_IO_APIC; io_apic->header.length = sizeof(*io_apic); io_apic->id = domain_vioapic(d, i)->id; io_apic->address = domain_vioapic(d, i)->base_address; io_apic->global_irq_base = domain_vioapic(d, i)->base_gsi; io_apic++; } x2apic = (void *)io_apic; for ( i = 0; i < max_vcpus; i++ ) { x2apic->header.type = ACPI_MADT_TYPE_LOCAL_X2APIC; x2apic->header.length = sizeof(*x2apic); x2apic->uid = i; x2apic->local_apic_id = i * 2; x2apic->lapic_flags = ACPI_MADT_ENABLED; x2apic++; } /* Setup interrupt overrides. */ intsrcovr = (void *)x2apic; acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_set_intr_ovr, acpi_intr_overrides); /* Setup NMI sources. */ nmisrc = (void *)intsrcovr; acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_set_nmi_src, acpi_nmi_sources); ASSERT(((void *)nmisrc - (void *)madt) == size); madt->header.length = size; /* * Calling acpi_tb_checksum here is a layering violation, but * introducing a wrapper for such simple usage seems overkill. */ madt->header.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, madt), size); /* Place the new MADT in guest memory space. */ if ( pvh_steal_ram(d, size, 0, GB(4), addr) ) { printk("Unable to find allocate guest RAM for MADT\n"); rc = -ENOMEM; goto out; } /* Mark this region as E820_ACPI. */ if ( pvh_add_mem_range(d, *addr, *addr + size, E820_ACPI) ) printk("Unable to add MADT region to memory map\n"); rc = hvm_copy_to_guest_phys(*addr, madt, size, d->vcpu[0]); if ( rc ) { printk("Unable to copy MADT into guest memory\n"); goto out; } rc = 0; out: xfree(madt); return rc; }
static int __init pvh_load_kernel(struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *image_base, char *cmdline, paddr_t *entry, paddr_t *start_info_addr) { void *image_start = image_base + image_headroom; unsigned long image_len = image->mod_end; struct elf_binary elf; struct elf_dom_parms parms; paddr_t last_addr; struct hvm_start_info start_info = { 0 }; struct hvm_modlist_entry mod = { 0 }; struct vcpu *v = d->vcpu[0]; int rc; if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) { printk("Error trying to detect bz compressed kernel\n"); return rc; } if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) { printk("Unable to init ELF\n"); return rc; } #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) { printk("Unable to parse kernel for ELFNOTES\n"); return rc; } if ( parms.phys_entry == UNSET_ADDR32 ) { printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n"); return -EINVAL; } printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os, parms.guest_ver, parms.loader, elf_64bit(&elf) ? "64-bit" : "32-bit"); /* Copy the OS image and free temporary buffer. */ elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base); elf.dest_size = parms.virt_kend - parms.virt_kstart; elf_set_vcpu(&elf, v); rc = elf_load_binary(&elf); if ( rc < 0 ) { printk("Failed to load kernel: %d\n", rc); printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf)); return rc; } last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE); if ( initrd != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, mfn_to_virt(initrd->mod_start), initrd->mod_end, v); if ( rc ) { printk("Unable to copy initrd to guest\n"); return rc; } mod.paddr = last_addr; mod.size = initrd->mod_end; last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE); } /* Free temporary buffers. */ discard_initial_images(); if ( cmdline != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, cmdline, strlen(cmdline) + 1, v); if ( rc ) { printk("Unable to copy guest command line\n"); return rc; } start_info.cmdline_paddr = last_addr; /* * Round up to 32/64 bits (depending on the guest kernel bitness) so * the modlist/start_info is aligned. */ last_addr += ROUNDUP(strlen(cmdline) + 1, elf_64bit(&elf) ? 8 : 4); } if ( initrd != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, &mod, sizeof(mod), v); if ( rc ) { printk("Unable to copy guest modules\n"); return rc; } start_info.modlist_paddr = last_addr; start_info.nr_modules = 1; last_addr += sizeof(mod); } start_info.magic = XEN_HVM_START_MAGIC_VALUE; start_info.flags = SIF_PRIVILEGED | SIF_INITDOMAIN; rc = hvm_copy_to_guest_phys(last_addr, &start_info, sizeof(start_info), v); if ( rc ) { printk("Unable to copy start info to guest\n"); return rc; } *entry = parms.phys_entry; *start_info_addr = last_addr; return 0; }
static void realmode_deliver_exception( unsigned int vector, unsigned int insn_len, struct hvm_emulate_ctxt *hvmemul_ctxt) { struct segment_register *idtr, *csr; struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs; uint32_t cs_eip, pstk; uint16_t frame[3]; unsigned int last_byte; idtr = hvmemul_get_seg_reg(x86_seg_idtr, hvmemul_ctxt); csr = hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt); __set_bit(x86_seg_cs, &hvmemul_ctxt->seg_reg_dirty); again: last_byte = (vector * 4) + 3; if ( idtr->limit < last_byte || hvm_copy_from_guest_phys(&cs_eip, idtr->base + vector * 4, 4) != HVMCOPY_okay ) { /* Software interrupt? */ if ( insn_len != 0 ) { insn_len = 0; vector = TRAP_gp_fault; goto again; } /* Exception or hardware interrupt. */ switch ( vector ) { case TRAP_double_fault: hvm_triple_fault(); return; case TRAP_gp_fault: vector = TRAP_double_fault; goto again; default: vector = TRAP_gp_fault; goto again; } } frame[0] = regs->eip + insn_len; frame[1] = csr->sel; frame[2] = regs->eflags & ~X86_EFLAGS_RF; /* We can't test hvmemul_ctxt->ctxt.sp_size: it may not be initialised. */ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ) { regs->esp -= 6; pstk = regs->esp; } else { pstk = (uint16_t)(regs->esp - 6); regs->esp &= ~0xffff; regs->esp |= pstk; } pstk += hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt)->base; (void)hvm_copy_to_guest_phys(pstk, frame, sizeof(frame)); csr->sel = cs_eip >> 16; csr->base = (uint32_t)csr->sel << 4; regs->eip = (uint16_t)cs_eip; regs->eflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF | X86_EFLAGS_RF); /* Exception delivery clears STI and MOV-SS blocking. */ if ( hvmemul_ctxt->intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS) ) { hvmemul_ctxt->intr_shadow &= ~(VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS); __vmwrite(GUEST_INTERRUPTIBILITY_INFO, hvmemul_ctxt->intr_shadow); } }
static int hvm_mmio_access(struct vcpu *v, ioreq_t *p, hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { unsigned long data; int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { rc = read_handler(v, p->addr, p->size, &data); p->data = data; } else /* p->dir == IOREQ_WRITE */ rc = write_handler(v, p->addr, p->size, p->data); return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { int ret; rc = read_handler(v, p->addr + (sign * i * p->size), p->size, &data); if ( rc != X86EMUL_OKAY ) break; ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data, p->size); if ( (ret == HVMCOPY_gfn_paged_out) || (ret == HVMCOPY_gfn_shared) ) { rc = X86EMUL_RETRY; break; } } } else { for ( i = 0; i < p->count; i++ ) { int ret; ret = hvm_copy_from_guest_phys(&data, p->data + (sign * i * p->size), p->size); if ( (ret == HVMCOPY_gfn_paged_out) || (ret == HVMCOPY_gfn_shared) ) { rc = X86EMUL_RETRY; break; } rc = write_handler(v, p->addr + (sign * i * p->size), p->size, data); if ( rc != X86EMUL_OKAY ) break; } } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
int wrmsr_viridian_regs(uint32_t idx, uint64_t val) { struct domain *d = current->domain; if ( !is_viridian_domain(d) ) return 0; switch ( idx ) { case VIRIDIAN_MSR_GUEST_OS_ID: perfc_incr(mshv_wrmsr_osid); d->arch.hvm_domain.viridian.guest_os_id.raw = val; gdprintk(XENLOG_INFO, "Guest os:\n"); gdprintk(XENLOG_INFO, "\tvendor: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.vendor); gdprintk(XENLOG_INFO, "\tos: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.os); gdprintk(XENLOG_INFO, "\tmajor: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.major); gdprintk(XENLOG_INFO, "\tminor: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.minor); gdprintk(XENLOG_INFO, "\tsp: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.service_pack); gdprintk(XENLOG_INFO, "\tbuild: %x\n", d->arch.hvm_domain.viridian.guest_os_id.fields.build_number); break; case VIRIDIAN_MSR_HYPERCALL: perfc_incr(mshv_wrmsr_hc_page); gdprintk(XENLOG_INFO, "Set hypercall page %"PRIx64".\n", val); if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 ) break; d->arch.hvm_domain.viridian.hypercall_gpa.raw = val; if ( d->arch.hvm_domain.viridian.hypercall_gpa.fields.enabled ) enable_hypercall_page(); break; case VIRIDIAN_MSR_VP_INDEX: perfc_incr(mshv_wrmsr_vp_index); gdprintk(XENLOG_INFO, "Set VP index %"PRIu64".\n", val); break; case VIRIDIAN_MSR_EOI: perfc_incr(mshv_wrmsr_eoi); vlapic_EOI_set(vcpu_vlapic(current)); break; case VIRIDIAN_MSR_ICR: { u32 eax = (u32)val, edx = (u32)(val >> 32); struct vlapic *vlapic = vcpu_vlapic(current); perfc_incr(mshv_wrmsr_icr); eax &= ~(1 << 12); edx &= 0xff000000; vlapic_set_reg(vlapic, APIC_ICR2, edx); if ( vlapic_ipi(vlapic, eax, edx) == X86EMUL_OKAY ) vlapic_set_reg(vlapic, APIC_ICR, eax); break; } case VIRIDIAN_MSR_TPR: perfc_incr(mshv_wrmsr_tpr); vlapic_set_reg(vcpu_vlapic(current), APIC_TASKPRI, (uint8_t)val); break; case VIRIDIAN_MSR_APIC_ASSIST: /* * We don't support the APIC assist page, and that fact is reflected in * our CPUID flags. However, Windows 7 build 7000 has a bug which means * that it doesn't recognise that, and tries to use the page anyway. We * therefore have to fake up just enough to keep win7 happy. * Fortunately, that's really easy: just setting the first four bytes * in the page to zero effectively disables the page again, so that's * what we do. Semantically, the first four bytes are supposed to be a * flag saying whether the guest really needs to issue an EOI. Setting * that flag to zero means that it must always issue one, which is what * we want. Once a page has been repurposed as an APIC assist page the * guest isn't allowed to set anything in it, so the flag remains zero * and all is fine. The guest is allowed to clear flags in the page, * but that doesn't cause us any problems. */ if ( val & 1 ) /* APIC assist page enabled? */ { uint32_t word = 0; paddr_t page_start = val & ~1ul; (void)hvm_copy_to_guest_phys(page_start, &word, sizeof(word)); } break; default: return 0; } return 1; }
int hvm_process_io_intercept(const struct hvm_io_handler *handler, ioreq_t *p) { const struct hvm_io_ops *ops = handler->ops; int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size; uint64_t data; uint64_t addr; if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { addr = (p->type == IOREQ_TYPE_COPY) ? p->addr + step * i : p->addr; rc = ops->read(handler, addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; if ( p->data_is_ptr ) { switch ( hvm_copy_to_guest_phys(p->data + step * i, &data, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_bad_gfn_to_mfn: /* Drop the write as real hardware would. */ continue; case HVMCOPY_bad_gva_to_gfn: case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: ASSERT_UNREACHABLE(); /* fall through */ default: domain_crash(current->domain); return X86EMUL_UNHANDLEABLE; } } else p->data = data; } } else /* p->dir == IOREQ_WRITE */ { for ( i = 0; i < p->count; i++ ) { if ( p->data_is_ptr ) { switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; case HVMCOPY_bad_gva_to_gfn: case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: ASSERT_UNREACHABLE(); /* fall through */ default: domain_crash(current->domain); return X86EMUL_UNHANDLEABLE; } } else data = p->data; addr = (p->type == IOREQ_TYPE_COPY) ? p->addr + step * i : p->addr; rc = ops->write(handler, addr, p->size, data); if ( rc != X86EMUL_OKAY ) break; } } if ( i ) { p->count = i; rc = X86EMUL_OKAY; } else if ( rc == X86EMUL_UNHANDLEABLE ) { /* * Don't forward entire batches to the device model: This would * prevent the internal handlers to see subsequent iterations of * the request. */ p->count = 1; } return rc; }
static int hvm_mmio_access(struct vcpu *v, ioreq_t *p, hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io; unsigned long data; int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { if ( vio->mmio_retrying ) { if ( vio->mmio_large_read_bytes != p->size ) return X86EMUL_UNHANDLEABLE; memcpy(&data, vio->mmio_large_read, p->size); vio->mmio_large_read_bytes = 0; vio->mmio_retrying = 0; } else rc = read_handler(v, p->addr, p->size, &data); p->data = data; } else /* p->dir == IOREQ_WRITE */ rc = write_handler(v, p->addr, p->size, p->data); return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { if ( vio->mmio_retrying ) { if ( vio->mmio_large_read_bytes != p->size ) return X86EMUL_UNHANDLEABLE; memcpy(&data, vio->mmio_large_read, p->size); vio->mmio_large_read_bytes = 0; vio->mmio_retrying = 0; } else { rc = read_handler(v, p->addr + step * i, p->size, &data); if ( rc != X86EMUL_OKAY ) break; } switch ( hvm_copy_to_guest_phys(p->data + step * i, &data, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: /* Drop the write as real hardware would. */ continue; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY) break; } if ( rc == X86EMUL_RETRY ) { vio->mmio_retry = 1; vio->mmio_large_read_bytes = p->size; memcpy(vio->mmio_large_read, &data, p->size); } } else { for ( i = 0; i < p->count; i++ ) { switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY ) break; rc = write_handler(v, p->addr + step * i, p->size, data); if ( rc != X86EMUL_OKAY ) break; } if ( rc == X86EMUL_RETRY ) vio->mmio_retry = 1; } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
static int process_portio_intercept(portio_action_t action, ioreq_t *p) { struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io; int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size; uint32_t data; if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) { if ( vio->mmio_retrying ) { if ( vio->mmio_large_read_bytes != p->size ) return X86EMUL_UNHANDLEABLE; memcpy(&data, vio->mmio_large_read, p->size); vio->mmio_large_read_bytes = 0; vio->mmio_retrying = 0; } else rc = action(IOREQ_READ, p->addr, p->size, &data); p->data = data; } else { data = p->data; rc = action(IOREQ_WRITE, p->addr, p->size, &data); } return rc; } if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { if ( vio->mmio_retrying ) { if ( vio->mmio_large_read_bytes != p->size ) return X86EMUL_UNHANDLEABLE; memcpy(&data, vio->mmio_large_read, p->size); vio->mmio_large_read_bytes = 0; vio->mmio_retrying = 0; } else { rc = action(IOREQ_READ, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; } switch ( hvm_copy_to_guest_phys(p->data + step * i, &data, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: /* Drop the write as real hardware would. */ continue; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY) break; } if ( rc == X86EMUL_RETRY ) { vio->mmio_retry = 1; vio->mmio_large_read_bytes = p->size; memcpy(vio->mmio_large_read, &data, p->size); } } else /* p->dir == IOREQ_WRITE */ { for ( i = 0; i < p->count; i++ ) { data = 0; switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, p->size) ) { case HVMCOPY_okay: break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: rc = X86EMUL_RETRY; break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; case HVMCOPY_bad_gva_to_gfn: ASSERT(0); /* fall through */ default: rc = X86EMUL_UNHANDLEABLE; break; } if ( rc != X86EMUL_OKAY ) break; rc = action(IOREQ_WRITE, p->addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; } if ( rc == X86EMUL_RETRY ) vio->mmio_retry = 1; } if ( i != 0 ) { p->count = i; rc = X86EMUL_OKAY; } return rc; }
static inline void hvm_mmio_access(struct vcpu *v, ioreq_t *p, hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { unsigned int tmp1, tmp2; unsigned long data; switch ( p->type ) { case IOREQ_TYPE_COPY: { if ( !p->data_is_ptr ) { if ( p->dir == IOREQ_READ ) p->data = read_handler(v, p->addr, p->size); else /* p->dir == IOREQ_WRITE */ write_handler(v, p->addr, p->size, p->data); } else { /* p->data_is_ptr */ int i, sign = (p->df) ? -1 : 1; if ( p->dir == IOREQ_READ ) { for ( i = 0; i < p->count; i++ ) { data = read_handler(v, p->addr + (sign * i * p->size), p->size); (void)hvm_copy_to_guest_phys( p->data + (sign * i * p->size), &data, p->size); } } else {/* p->dir == IOREQ_WRITE */ for ( i = 0; i < p->count; i++ ) { (void)hvm_copy_from_guest_phys( &data, p->data + (sign * i * p->size), p->size); write_handler(v, p->addr + (sign * i * p->size), p->size, data); } } } break; } case IOREQ_TYPE_AND: tmp1 = read_handler(v, p->addr, p->size); if ( p->dir == IOREQ_WRITE ) { tmp2 = tmp1 & (unsigned long) p->data; write_handler(v, p->addr, p->size, tmp2); } p->data = tmp1; break; case IOREQ_TYPE_ADD: tmp1 = read_handler(v, p->addr, p->size); if (p->dir == IOREQ_WRITE) { tmp2 = tmp1 + (unsigned long) p->data; write_handler(v, p->addr, p->size, tmp2); } p->data = tmp1; break; case IOREQ_TYPE_OR: tmp1 = read_handler(v, p->addr, p->size); if ( p->dir == IOREQ_WRITE ) { tmp2 = tmp1 | (unsigned long) p->data; write_handler(v, p->addr, p->size, tmp2); } p->data = tmp1; break; case IOREQ_TYPE_XOR: tmp1 = read_handler(v, p->addr, p->size); if ( p->dir == IOREQ_WRITE ) { tmp2 = tmp1 ^ (unsigned long) p->data; write_handler(v, p->addr, p->size, tmp2); } p->data = tmp1; break; case IOREQ_TYPE_XCHG: /* * Note that we don't need to be atomic here since VCPU is accessing * its own local APIC. */ tmp1 = read_handler(v, p->addr, p->size); write_handler(v, p->addr, p->size, (unsigned long) p->data); p->data = tmp1; break; case IOREQ_TYPE_SUB: tmp1 = read_handler(v, p->addr, p->size); if ( p->dir == IOREQ_WRITE ) { tmp2 = tmp1 - (unsigned long) p->data; write_handler(v, p->addr, p->size, tmp2); } p->data = tmp1; break; default: printk("hvm_mmio_access: error ioreq type %x\n", p->type); domain_crash_synchronous(); break; } }