Beispiel #1
0
static int __init pvh_setup_vmx_realmode_helpers(struct domain *d)
{
    p2m_type_t p2mt;
    uint32_t rc, *ident_pt;
    mfn_t mfn;
    paddr_t gaddr;
    struct vcpu *v = d->vcpu[0];

    /*
     * Steal some space from the last RAM region below 4GB and use it to
     * store the real-mode TSS. It needs to be aligned to 128 so that the
     * TSS structure (which accounts for the first 104b) doesn't cross
     * a page boundary.
     */
    if ( !pvh_steal_ram(d, HVM_VM86_TSS_SIZE, 128, GB(4), &gaddr) )
    {
        if ( hvm_copy_to_guest_phys(gaddr, NULL, HVM_VM86_TSS_SIZE, v) !=
             HVMCOPY_okay )
            printk("Unable to zero VM86 TSS area\n");
        d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZED] =
            VM86_TSS_UPDATED | ((uint64_t)HVM_VM86_TSS_SIZE << 32) | gaddr;
        if ( pvh_add_mem_range(d, gaddr, gaddr + HVM_VM86_TSS_SIZE,
                               E820_RESERVED) )
            printk("Unable to set VM86 TSS as reserved in the memory map\n");
    }
    else
        printk("Unable to allocate VM86 TSS area\n");

    /* Steal some more RAM for the identity page tables. */
    if ( pvh_steal_ram(d, PAGE_SIZE, PAGE_SIZE, GB(4), &gaddr) )
    {
        printk("Unable to find memory to stash the identity page tables\n");
        return -ENOMEM;
    }

    /*
     * Identity-map page table is required for running with CR0.PG=0
     * when using Intel EPT. Create a 32-bit non-PAE page directory of
     * superpages.
     */
    ident_pt = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)),
                              &mfn, &p2mt, 0, &rc);
    if ( ident_pt == NULL )
    {
        printk("Unable to map identity page tables\n");
        return -ENOMEM;
    }
    write_32bit_pse_identmap(ident_pt);
    unmap_domain_page(ident_pt);
    put_page(mfn_to_page(mfn_x(mfn)));
    d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] = gaddr;
    if ( pvh_add_mem_range(d, gaddr, gaddr + PAGE_SIZE, E820_RESERVED) )
            printk("Unable to set identity page tables as reserved in the memory map\n");

    return 0;
}
static int process_portio_intercept(portio_action_t action, ioreq_t *p)
{
    int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
    uint32_t data;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            rc = action(IOREQ_READ, p->addr, p->size, &data);
            p->data = data;
        }
        else
        {
            data = p->data;
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
        }
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            rc = action(IOREQ_READ, p->addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;
            (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
                                         &data, p->size);
        }
    }
    else /* p->dir == IOREQ_WRITE */
    {
        for ( i = 0; i < p->count; i++ )
        {
            data = 0;
            (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size,
                                           p->size);
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;
        }
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
Beispiel #3
0
static int hvm_mmio_access(struct vcpu *v,
                           ioreq_t *p,
                           hvm_mmio_read_t read_handler,
                           hvm_mmio_write_t write_handler)
{
    unsigned long data;
    int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            rc = read_handler(v, p->addr, p->size, &data);
            p->data = data;
        }
        else /* p->dir == IOREQ_WRITE */
            rc = write_handler(v, p->addr, p->size, p->data);
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            int ret;

            rc = read_handler(v, p->addr + (sign * i * p->size), p->size,
                              &data);
            if ( rc != X86EMUL_OKAY )
                break;
            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size),
                                         &data,
                                         p->size);
            if ( (ret == HVMCOPY_gfn_paged_out) ||
                    (ret == HVMCOPY_gfn_shared) )
            {
                rc = X86EMUL_RETRY;
                break;
            }
        }
    }
    else
    {
        for ( i = 0; i < p->count; i++ )
        {
            switch ( hvm_copy_from_guest_phys(&data,
                                              p->data + sign * i * p->size,
                                              p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                data = ~0;
                break;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
            /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY )
                break;
            rc = write_handler(v, p->addr + (sign * i * p->size), p->size,
                               data);
            if ( rc != X86EMUL_OKAY )
                break;
        }
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
Beispiel #4
0
static int process_portio_intercept(portio_action_t action, ioreq_t *p)
{
    int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
    uint32_t data;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            rc = action(IOREQ_READ, p->addr, p->size, &data);
            p->data = data;
        }
        else
        {
            data = p->data;
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
        }
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            rc = action(IOREQ_READ, p->addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;
            (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
                                         &data, p->size);
        }
    }
    else /* p->dir == IOREQ_WRITE */
    {
        for ( i = 0; i < p->count; i++ )
        {
            data = 0;
            switch ( hvm_copy_from_guest_phys(&data,
                                              p->data + sign * i * p->size,
                                              p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                data = ~0;
                break;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
            /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY )
                break;
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;
        }
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
Beispiel #5
0
static int __init pvh_setup_acpi(struct domain *d, paddr_t start_info)
{
    unsigned long pfn, nr_pages;
    paddr_t madt_paddr, xsdt_paddr, rsdp_paddr;
    unsigned int i;
    int rc;
    struct acpi_table_rsdp *native_rsdp, rsdp = {
        .signature = ACPI_SIG_RSDP,
        .revision = 2,
        .length = sizeof(rsdp),
    };


    /* Scan top-level tables and add their regions to the guest memory map. */
    for( i = 0; i < acpi_gbl_root_table_list.count; i++ )
    {
        const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii;
        unsigned long addr = acpi_gbl_root_table_list.tables[i].address;
        unsigned long size = acpi_gbl_root_table_list.tables[i].length;

        /*
         * Make sure the original MADT is also mapped, so that Dom0 can
         * properly access the data returned by _MAT methods in case it's
         * re-using MADT memory.
         */
        if ( strncmp(sig, ACPI_SIG_MADT, ACPI_NAME_SIZE)
             ? pvh_acpi_table_allowed(sig)
             : !acpi_memory_banned(addr, size) )
             pvh_add_mem_range(d, addr, addr + size, E820_ACPI);
    }

    /* Identity map ACPI e820 regions. */
    for ( i = 0; i < d->arch.nr_e820; i++ )
    {
        if ( d->arch.e820[i].type != E820_ACPI &&
             d->arch.e820[i].type != E820_NVS )
            continue;

        pfn = PFN_DOWN(d->arch.e820[i].addr);
        nr_pages = PFN_UP((d->arch.e820[i].addr & ~PAGE_MASK) +
                          d->arch.e820[i].size);

        rc = modify_identity_mmio(d, pfn, nr_pages, true);
        if ( rc )
        {
            printk("Failed to map ACPI region [%#lx, %#lx) into Dom0 memory map\n",
                   pfn, pfn + nr_pages);
            return rc;
        }
    }

    rc = pvh_setup_acpi_madt(d, &madt_paddr);
    if ( rc )
        return rc;

    rc = pvh_setup_acpi_xsdt(d, madt_paddr, &xsdt_paddr);
    if ( rc )
        return rc;

    /* Craft a custom RSDP. */
    native_rsdp = acpi_os_map_memory(acpi_os_get_root_pointer(), sizeof(rsdp));
    if ( !native_rsdp )
    {
        printk("Failed to map native RSDP\n");
        return -ENOMEM;
    }
    memcpy(rsdp.oem_id, native_rsdp->oem_id, sizeof(rsdp.oem_id));
    acpi_os_unmap_memory(native_rsdp, sizeof(rsdp));
    rsdp.xsdt_physical_address = xsdt_paddr;
    /*
     * Calling acpi_tb_checksum here is a layering violation, but
     * introducing a wrapper for such simple usage seems overkill.
     */
    rsdp.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp),
                                      ACPI_RSDP_REV0_SIZE);
    rsdp.extended_checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp),
                                               sizeof(rsdp));

    /*
     * Place the new RSDP in guest memory space.
     *
     * NB: this RSDP is not going to replace the original RSDP, which should
     * still be accessible to the guest. However that RSDP is going to point to
     * the native RSDT, and should not be used for the Dom0 kernel's boot
     * purposes (we keep it visible for post boot access).
     */
    if ( pvh_steal_ram(d, sizeof(rsdp), 0, GB(4), &rsdp_paddr) )
    {
        printk("Unable to allocate guest RAM for RSDP\n");
        return -ENOMEM;
    }

    /* Mark this region as E820_ACPI. */
    if ( pvh_add_mem_range(d, rsdp_paddr, rsdp_paddr + sizeof(rsdp),
                           E820_ACPI) )
        printk("Unable to add RSDP region to memory map\n");

    /* Copy RSDP into guest memory. */
    rc = hvm_copy_to_guest_phys(rsdp_paddr, &rsdp, sizeof(rsdp), d->vcpu[0]);
    if ( rc )
    {
        printk("Unable to copy RSDP into guest memory\n");
        return rc;
    }

    /* Copy RSDP address to start_info. */
    rc = hvm_copy_to_guest_phys(start_info +
                                offsetof(struct hvm_start_info, rsdp_paddr),
                                &rsdp_paddr,
                                sizeof(((struct hvm_start_info *)
                                        0)->rsdp_paddr),
                                d->vcpu[0]);
    if ( rc )
    {
        printk("Unable to copy RSDP into guest memory\n");
        return rc;
    }

    return 0;
}

int __init dom0_construct_pvh(struct domain *d, const module_t *image,
                              unsigned long image_headroom,
                              module_t *initrd,
                              void *(*bootstrap_map)(const module_t *),
                              char *cmdline)
{
    paddr_t entry, start_info;
    int rc;

    printk("** Building a PVH Dom0 **\n");

    iommu_hwdom_init(d);

    rc = pvh_setup_p2m(d);
    if ( rc )
    {
        printk("Failed to setup Dom0 physical memory map\n");
        return rc;
    }

    rc = pvh_load_kernel(d, image, image_headroom, initrd, bootstrap_map(image),
                         cmdline, &entry, &start_info);
    if ( rc )
    {
        printk("Failed to load Dom0 kernel\n");
        return rc;
    }

    rc = pvh_setup_cpus(d, entry, start_info);
    if ( rc )
    {
        printk("Failed to setup Dom0 CPUs: %d\n", rc);
        return rc;
    }

    rc = pvh_setup_acpi(d, start_info);
    if ( rc )
    {
        printk("Failed to setup Dom0 ACPI tables: %d\n", rc);
        return rc;
    }

    panic("Building a PVHv2 Dom0 is not yet supported.");
    return 0;
}
Beispiel #6
0
static int __init pvh_setup_acpi_xsdt(struct domain *d, paddr_t madt_addr,
                                      paddr_t *addr)
{
    struct acpi_table_xsdt *xsdt;
    struct acpi_table_header *table;
    struct acpi_table_rsdp *rsdp;
    unsigned long size = sizeof(*xsdt);
    unsigned int i, j, num_tables = 0;
    paddr_t xsdt_paddr;
    int rc;

    /*
     * Restore original DMAR table signature, we are going to filter it from
     * the new XSDT that is presented to the guest, so it is no longer
     * necessary to have it's signature zapped.
     */
    acpi_dmar_reinstate();

    /* Count the number of tables that will be added to the XSDT. */
    for( i = 0; i < acpi_gbl_root_table_list.count; i++ )
    {
        const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii;

        if ( pvh_acpi_table_allowed(sig) )
            num_tables++;
    }

    /*
     * No need to add or subtract anything because struct acpi_table_xsdt
     * includes one array slot already, and we have filtered out the original
     * MADT and we are going to add a custom built MADT.
     */
    size += num_tables * sizeof(xsdt->table_offset_entry[0]);

    xsdt = xzalloc_bytes(size);
    if ( !xsdt )
    {
        printk("Unable to allocate memory for XSDT table\n");
        rc = -ENOMEM;
        goto out;
    }

    /* Copy the native XSDT table header. */
    rsdp = acpi_os_map_memory(acpi_os_get_root_pointer(), sizeof(*rsdp));
    if ( !rsdp )
    {
        printk("Unable to map RSDP\n");
        rc = -EINVAL;
        goto out;
    }
    xsdt_paddr = rsdp->xsdt_physical_address;
    acpi_os_unmap_memory(rsdp, sizeof(*rsdp));
    table = acpi_os_map_memory(xsdt_paddr, sizeof(*table));
    if ( !table )
    {
        printk("Unable to map XSDT\n");
        rc = -EINVAL;
        goto out;
    }
    xsdt->header = *table;
    acpi_os_unmap_memory(table, sizeof(*table));

    /* Add the custom MADT. */
    xsdt->table_offset_entry[0] = madt_addr;

    /* Copy the addresses of the rest of the allowed tables. */
    for( i = 0, j = 1; i < acpi_gbl_root_table_list.count; i++ )
    {
        const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii;

        if ( pvh_acpi_table_allowed(sig) )
            xsdt->table_offset_entry[j++] =
                acpi_gbl_root_table_list.tables[i].address;
    }

    xsdt->header.revision = 1;
    xsdt->header.length = size;
    /*
     * Calling acpi_tb_checksum here is a layering violation, but
     * introducing a wrapper for such simple usage seems overkill.
     */
    xsdt->header.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, xsdt), size);

    /* Place the new XSDT in guest memory space. */
    if ( pvh_steal_ram(d, size, 0, GB(4), addr) )
    {
        printk("Unable to find guest RAM for XSDT\n");
        rc = -ENOMEM;
        goto out;
    }

    /* Mark this region as E820_ACPI. */
    if ( pvh_add_mem_range(d, *addr, *addr + size, E820_ACPI) )
        printk("Unable to add XSDT region to memory map\n");

    rc = hvm_copy_to_guest_phys(*addr, xsdt, size, d->vcpu[0]);
    if ( rc )
    {
        printk("Unable to copy XSDT into guest memory\n");
        goto out;
    }

    rc = 0;

 out:
    xfree(xsdt);

    return rc;
}
Beispiel #7
0
static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr)
{
    struct acpi_table_madt *madt;
    struct acpi_table_header *table;
    struct acpi_madt_io_apic *io_apic;
    struct acpi_madt_local_x2apic *x2apic;
    acpi_status status;
    unsigned long size;
    unsigned int i, max_vcpus;
    int rc;

    /* Count number of interrupt overrides in the MADT. */
    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
                          acpi_count_intr_ovr, UINT_MAX);

    /* Count number of NMI sources in the MADT. */
    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_count_nmi_src,
                          UINT_MAX);

    max_vcpus = dom0_max_vcpus();
    /* Calculate the size of the crafted MADT. */
    size = sizeof(*madt);
    size += sizeof(*io_apic) * nr_ioapics;
    size += sizeof(*intsrcovr) * acpi_intr_overrides;
    size += sizeof(*nmisrc) * acpi_nmi_sources;
    size += sizeof(*x2apic) * max_vcpus;

    madt = xzalloc_bytes(size);
    if ( !madt )
    {
        printk("Unable to allocate memory for MADT table\n");
        rc = -ENOMEM;
        goto out;
    }

    /* Copy the native MADT table header. */
    status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
    if ( !ACPI_SUCCESS(status) )
    {
        printk("Failed to get MADT ACPI table, aborting.\n");
        rc = -EINVAL;
        goto out;
    }
    madt->header = *table;
    madt->address = APIC_DEFAULT_PHYS_BASE;
    /*
     * NB: this is currently set to 4, which is the revision in the ACPI
     * spec 6.1. Sadly ACPICA doesn't provide revision numbers for the
     * tables described in the headers.
     */
    madt->header.revision = min_t(unsigned char, table->revision, 4);

    /* Setup the IO APIC entries. */
    io_apic = (void *)(madt + 1);
    for ( i = 0; i < nr_ioapics; i++ )
    {
        io_apic->header.type = ACPI_MADT_TYPE_IO_APIC;
        io_apic->header.length = sizeof(*io_apic);
        io_apic->id = domain_vioapic(d, i)->id;
        io_apic->address = domain_vioapic(d, i)->base_address;
        io_apic->global_irq_base = domain_vioapic(d, i)->base_gsi;
        io_apic++;
    }

    x2apic = (void *)io_apic;
    for ( i = 0; i < max_vcpus; i++ )
    {
        x2apic->header.type = ACPI_MADT_TYPE_LOCAL_X2APIC;
        x2apic->header.length = sizeof(*x2apic);
        x2apic->uid = i;
        x2apic->local_apic_id = i * 2;
        x2apic->lapic_flags = ACPI_MADT_ENABLED;
        x2apic++;
    }

    /* Setup interrupt overrides. */
    intsrcovr = (void *)x2apic;
    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_set_intr_ovr,
                          acpi_intr_overrides);

    /* Setup NMI sources. */
    nmisrc = (void *)intsrcovr;
    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_set_nmi_src,
                          acpi_nmi_sources);

    ASSERT(((void *)nmisrc - (void *)madt) == size);
    madt->header.length = size;
    /*
     * Calling acpi_tb_checksum here is a layering violation, but
     * introducing a wrapper for such simple usage seems overkill.
     */
    madt->header.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, madt), size);

    /* Place the new MADT in guest memory space. */
    if ( pvh_steal_ram(d, size, 0, GB(4), addr) )
    {
        printk("Unable to find allocate guest RAM for MADT\n");
        rc = -ENOMEM;
        goto out;
    }

    /* Mark this region as E820_ACPI. */
    if ( pvh_add_mem_range(d, *addr, *addr + size, E820_ACPI) )
        printk("Unable to add MADT region to memory map\n");

    rc = hvm_copy_to_guest_phys(*addr, madt, size, d->vcpu[0]);
    if ( rc )
    {
        printk("Unable to copy MADT into guest memory\n");
        goto out;
    }

    rc = 0;

 out:
    xfree(madt);

    return rc;
}
Beispiel #8
0
static int __init pvh_load_kernel(struct domain *d, const module_t *image,
                                  unsigned long image_headroom,
                                  module_t *initrd, void *image_base,
                                  char *cmdline, paddr_t *entry,
                                  paddr_t *start_info_addr)
{
    void *image_start = image_base + image_headroom;
    unsigned long image_len = image->mod_end;
    struct elf_binary elf;
    struct elf_dom_parms parms;
    paddr_t last_addr;
    struct hvm_start_info start_info = { 0 };
    struct hvm_modlist_entry mod = { 0 };
    struct vcpu *v = d->vcpu[0];
    int rc;

    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
    {
        printk("Error trying to detect bz compressed kernel\n");
        return rc;
    }

    if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
    {
        printk("Unable to init ELF\n");
        return rc;
    }
#ifdef VERBOSE
    elf_set_verbose(&elf);
#endif
    elf_parse_binary(&elf);
    if ( (rc = elf_xen_parse(&elf, &parms)) != 0 )
    {
        printk("Unable to parse kernel for ELFNOTES\n");
        return rc;
    }

    if ( parms.phys_entry == UNSET_ADDR32 )
    {
        printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n");
        return -EINVAL;
    }

    printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os,
           parms.guest_ver, parms.loader,
           elf_64bit(&elf) ? "64-bit" : "32-bit");

    /* Copy the OS image and free temporary buffer. */
    elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base);
    elf.dest_size = parms.virt_kend - parms.virt_kstart;

    elf_set_vcpu(&elf, v);
    rc = elf_load_binary(&elf);
    if ( rc < 0 )
    {
        printk("Failed to load kernel: %d\n", rc);
        printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf));
        return rc;
    }

    last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE);

    if ( initrd != NULL )
    {
        rc = hvm_copy_to_guest_phys(last_addr, mfn_to_virt(initrd->mod_start),
                                    initrd->mod_end, v);
        if ( rc )
        {
            printk("Unable to copy initrd to guest\n");
            return rc;
        }

        mod.paddr = last_addr;
        mod.size = initrd->mod_end;
        last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE);
    }

    /* Free temporary buffers. */
    discard_initial_images();

    if ( cmdline != NULL )
    {
        rc = hvm_copy_to_guest_phys(last_addr, cmdline, strlen(cmdline) + 1, v);
        if ( rc )
        {
            printk("Unable to copy guest command line\n");
            return rc;
        }
        start_info.cmdline_paddr = last_addr;
        /*
         * Round up to 32/64 bits (depending on the guest kernel bitness) so
         * the modlist/start_info is aligned.
         */
        last_addr += ROUNDUP(strlen(cmdline) + 1, elf_64bit(&elf) ? 8 : 4);
    }
    if ( initrd != NULL )
    {
        rc = hvm_copy_to_guest_phys(last_addr, &mod, sizeof(mod), v);
        if ( rc )
        {
            printk("Unable to copy guest modules\n");
            return rc;
        }
        start_info.modlist_paddr = last_addr;
        start_info.nr_modules = 1;
        last_addr += sizeof(mod);
    }

    start_info.magic = XEN_HVM_START_MAGIC_VALUE;
    start_info.flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
    rc = hvm_copy_to_guest_phys(last_addr, &start_info, sizeof(start_info), v);
    if ( rc )
    {
        printk("Unable to copy start info to guest\n");
        return rc;
    }

    *entry = parms.phys_entry;
    *start_info_addr = last_addr;

    return 0;
}
Beispiel #9
0
static void realmode_deliver_exception(
    unsigned int vector,
    unsigned int insn_len,
    struct hvm_emulate_ctxt *hvmemul_ctxt)
{
    struct segment_register *idtr, *csr;
    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
    uint32_t cs_eip, pstk;
    uint16_t frame[3];
    unsigned int last_byte;

    idtr = hvmemul_get_seg_reg(x86_seg_idtr, hvmemul_ctxt);
    csr  = hvmemul_get_seg_reg(x86_seg_cs,   hvmemul_ctxt);
    __set_bit(x86_seg_cs, &hvmemul_ctxt->seg_reg_dirty);

 again:
    last_byte = (vector * 4) + 3;
    if ( idtr->limit < last_byte ||
         hvm_copy_from_guest_phys(&cs_eip, idtr->base + vector * 4, 4) !=
         HVMCOPY_okay )
    {
        /* Software interrupt? */
        if ( insn_len != 0 )
        {
            insn_len = 0;
            vector = TRAP_gp_fault;
            goto again;
        }

        /* Exception or hardware interrupt. */
        switch ( vector )
        {
        case TRAP_double_fault:
            hvm_triple_fault();
            return;
        case TRAP_gp_fault:
            vector = TRAP_double_fault;
            goto again;
        default:
            vector = TRAP_gp_fault;
            goto again;
        }
    }

    frame[0] = regs->eip + insn_len;
    frame[1] = csr->sel;
    frame[2] = regs->eflags & ~X86_EFLAGS_RF;

    /* We can't test hvmemul_ctxt->ctxt.sp_size: it may not be initialised. */
    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db )
    {
        regs->esp -= 6;
        pstk = regs->esp;
    }
    else
    {
        pstk = (uint16_t)(regs->esp - 6);
        regs->esp &= ~0xffff;
        regs->esp |= pstk;
    }

    pstk += hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt)->base;
    (void)hvm_copy_to_guest_phys(pstk, frame, sizeof(frame));

    csr->sel  = cs_eip >> 16;
    csr->base = (uint32_t)csr->sel << 4;
    regs->eip = (uint16_t)cs_eip;
    regs->eflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF | X86_EFLAGS_RF);

    /* Exception delivery clears STI and MOV-SS blocking. */
    if ( hvmemul_ctxt->intr_shadow &
         (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS) )
    {
        hvmemul_ctxt->intr_shadow &=
            ~(VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS);
        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, hvmemul_ctxt->intr_shadow);
    }
}
static int hvm_mmio_access(struct vcpu *v,
                           ioreq_t *p,
                           hvm_mmio_read_t read_handler,
                           hvm_mmio_write_t write_handler)
{
    unsigned long data;
    int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            rc = read_handler(v, p->addr, p->size, &data);
            p->data = data;
        }
        else /* p->dir == IOREQ_WRITE */
            rc = write_handler(v, p->addr, p->size, p->data);
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            int ret;

            rc = read_handler(v, p->addr + (sign * i * p->size), p->size,
                              &data);
            if ( rc != X86EMUL_OKAY )
                break;
            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size),
                                         &data,
                                         p->size);
            if ( (ret == HVMCOPY_gfn_paged_out) || 
                 (ret == HVMCOPY_gfn_shared) )
            {
                rc = X86EMUL_RETRY;
                break;
            }
        }
    }
    else
    {
        for ( i = 0; i < p->count; i++ )
        {
            int ret;

            ret = hvm_copy_from_guest_phys(&data,
                                           p->data + (sign * i * p->size),
                                           p->size);
            if ( (ret == HVMCOPY_gfn_paged_out) || 
                 (ret == HVMCOPY_gfn_shared) )
            {
                rc = X86EMUL_RETRY;
                break;
            }
            rc = write_handler(v, p->addr + (sign * i * p->size), p->size,
                               data);
            if ( rc != X86EMUL_OKAY )
                break;
        }
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
Beispiel #11
0
int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
{
    struct domain *d = current->domain;

    if ( !is_viridian_domain(d) )
        return 0;

    switch ( idx )
    {
    case VIRIDIAN_MSR_GUEST_OS_ID:
        perfc_incr(mshv_wrmsr_osid);
        d->arch.hvm_domain.viridian.guest_os_id.raw = val;
        gdprintk(XENLOG_INFO, "Guest os:\n");
        gdprintk(XENLOG_INFO, "\tvendor: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.vendor);
        gdprintk(XENLOG_INFO, "\tos: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.os);
        gdprintk(XENLOG_INFO, "\tmajor: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.major);
        gdprintk(XENLOG_INFO, "\tminor: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.minor);
        gdprintk(XENLOG_INFO, "\tsp: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.service_pack);
        gdprintk(XENLOG_INFO, "\tbuild: %x\n",
               d->arch.hvm_domain.viridian.guest_os_id.fields.build_number);
        break;

    case VIRIDIAN_MSR_HYPERCALL:
        perfc_incr(mshv_wrmsr_hc_page);
        gdprintk(XENLOG_INFO, "Set hypercall page %"PRIx64".\n", val);
        if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 )
            break;
        d->arch.hvm_domain.viridian.hypercall_gpa.raw = val;
        if ( d->arch.hvm_domain.viridian.hypercall_gpa.fields.enabled )
            enable_hypercall_page();
        break;

    case VIRIDIAN_MSR_VP_INDEX:
        perfc_incr(mshv_wrmsr_vp_index);
        gdprintk(XENLOG_INFO, "Set VP index %"PRIu64".\n", val);
        break;

    case VIRIDIAN_MSR_EOI:
        perfc_incr(mshv_wrmsr_eoi);
        vlapic_EOI_set(vcpu_vlapic(current));
        break;

    case VIRIDIAN_MSR_ICR: {
        u32 eax = (u32)val, edx = (u32)(val >> 32);
        struct vlapic *vlapic = vcpu_vlapic(current);
        perfc_incr(mshv_wrmsr_icr);
        eax &= ~(1 << 12);
        edx &= 0xff000000;
        vlapic_set_reg(vlapic, APIC_ICR2, edx);
        if ( vlapic_ipi(vlapic, eax, edx) == X86EMUL_OKAY )
            vlapic_set_reg(vlapic, APIC_ICR, eax);
        break;
    }

    case VIRIDIAN_MSR_TPR:
        perfc_incr(mshv_wrmsr_tpr);
        vlapic_set_reg(vcpu_vlapic(current), APIC_TASKPRI, (uint8_t)val);
        break;

    case VIRIDIAN_MSR_APIC_ASSIST:
        /*
         * We don't support the APIC assist page, and that fact is reflected in
         * our CPUID flags. However, Windows 7 build 7000 has a bug which means
         * that it doesn't recognise that, and tries to use the page anyway. We
         * therefore have to fake up just enough to keep win7 happy.
         * Fortunately, that's really easy: just setting the first four bytes
         * in the page to zero effectively disables the page again, so that's
         * what we do. Semantically, the first four bytes are supposed to be a
         * flag saying whether the guest really needs to issue an EOI. Setting
         * that flag to zero means that it must always issue one, which is what
         * we want. Once a page has been repurposed as an APIC assist page the
         * guest isn't allowed to set anything in it, so the flag remains zero
         * and all is fine. The guest is allowed to clear flags in the page,
         * but that doesn't cause us any problems.
         */
        if ( val & 1 ) /* APIC assist page enabled? */
        {
            uint32_t word = 0;
            paddr_t page_start = val & ~1ul;
            (void)hvm_copy_to_guest_phys(page_start, &word, sizeof(word));
        }
        break;

    default:
        return 0;
    }

    return 1;
}
Beispiel #12
0
int hvm_process_io_intercept(const struct hvm_io_handler *handler,
                             ioreq_t *p)
{
    const struct hvm_io_ops *ops = handler->ops;
    int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size;
    uint64_t data;
    uint64_t addr;

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            addr = (p->type == IOREQ_TYPE_COPY) ?
                   p->addr + step * i :
                   p->addr;
            rc = ops->read(handler, addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;

            if ( p->data_is_ptr )
            {
                switch ( hvm_copy_to_guest_phys(p->data + step * i,
                                                &data, p->size) )
                {
                case HVMCOPY_okay:
                    break;
                case HVMCOPY_bad_gfn_to_mfn:
                    /* Drop the write as real hardware would. */
                    continue;
                case HVMCOPY_bad_gva_to_gfn:
                case HVMCOPY_gfn_paged_out:
                case HVMCOPY_gfn_shared:
                    ASSERT_UNREACHABLE();
                    /* fall through */
                default:
                    domain_crash(current->domain);
                    return X86EMUL_UNHANDLEABLE;
                }
            }
            else
                p->data = data;
        }
    }
    else /* p->dir == IOREQ_WRITE */
    {
        for ( i = 0; i < p->count; i++ )
        {
            if ( p->data_is_ptr )
            {
                switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
                                                  p->size) )
                {
                case HVMCOPY_okay:
                    break;
                case HVMCOPY_bad_gfn_to_mfn:
                    data = ~0;
                    break;
                case HVMCOPY_bad_gva_to_gfn:
                case HVMCOPY_gfn_paged_out:
                case HVMCOPY_gfn_shared:
                    ASSERT_UNREACHABLE();
                    /* fall through */
                default:
                    domain_crash(current->domain);
                    return X86EMUL_UNHANDLEABLE;
                }
            }
            else
                data = p->data;

            addr = (p->type == IOREQ_TYPE_COPY) ?
                   p->addr + step * i :
                   p->addr;
            rc = ops->write(handler, addr, p->size, data);
            if ( rc != X86EMUL_OKAY )
                break;
        }
    }

    if ( i )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }
    else if ( rc == X86EMUL_UNHANDLEABLE )
    {
        /*
         * Don't forward entire batches to the device model: This would
         * prevent the internal handlers to see subsequent iterations of
         * the request.
         */
        p->count = 1;
    }

    return rc;
}
Beispiel #13
0
static int hvm_mmio_access(struct vcpu *v,
                           ioreq_t *p,
                           hvm_mmio_read_t read_handler,
                           hvm_mmio_write_t write_handler)
{
    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
    unsigned long data;
    int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            if ( vio->mmio_retrying )
            {
                if ( vio->mmio_large_read_bytes != p->size )
                    return X86EMUL_UNHANDLEABLE;
                memcpy(&data, vio->mmio_large_read, p->size);
                vio->mmio_large_read_bytes = 0;
                vio->mmio_retrying = 0;
            }
            else
                rc = read_handler(v, p->addr, p->size, &data);
            p->data = data;
        }
        else /* p->dir == IOREQ_WRITE */
            rc = write_handler(v, p->addr, p->size, p->data);
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            if ( vio->mmio_retrying )
            {
                if ( vio->mmio_large_read_bytes != p->size )
                    return X86EMUL_UNHANDLEABLE;
                memcpy(&data, vio->mmio_large_read, p->size);
                vio->mmio_large_read_bytes = 0;
                vio->mmio_retrying = 0;
            }
            else
            {
                rc = read_handler(v, p->addr + step * i, p->size, &data);
                if ( rc != X86EMUL_OKAY )
                    break;
            }
            switch ( hvm_copy_to_guest_phys(p->data + step * i,
                                            &data, p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                /* Drop the write as real hardware would. */
                continue;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
                /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY)
                break;
        }

        if ( rc == X86EMUL_RETRY )
        {
            vio->mmio_retry = 1;
            vio->mmio_large_read_bytes = p->size;
            memcpy(vio->mmio_large_read, &data, p->size);
        }
    }
    else
    {
        for ( i = 0; i < p->count; i++ )
        {
            switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
                                              p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                data = ~0;
                break;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
                /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY )
                break;
            rc = write_handler(v, p->addr + step * i, p->size, data);
            if ( rc != X86EMUL_OKAY )
                break;
        }

        if ( rc == X86EMUL_RETRY )
            vio->mmio_retry = 1;
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
Beispiel #14
0
static int process_portio_intercept(portio_action_t action, ioreq_t *p)
{
    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
    int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size;
    uint32_t data;

    if ( !p->data_is_ptr )
    {
        if ( p->dir == IOREQ_READ )
        {
            if ( vio->mmio_retrying )
            {
                if ( vio->mmio_large_read_bytes != p->size )
                    return X86EMUL_UNHANDLEABLE;
                memcpy(&data, vio->mmio_large_read, p->size);
                vio->mmio_large_read_bytes = 0;
                vio->mmio_retrying = 0;
            }
            else
                rc = action(IOREQ_READ, p->addr, p->size, &data);
            p->data = data;
        }
        else
        {
            data = p->data;
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
        }
        return rc;
    }

    if ( p->dir == IOREQ_READ )
    {
        for ( i = 0; i < p->count; i++ )
        {
            if ( vio->mmio_retrying )
            {
                if ( vio->mmio_large_read_bytes != p->size )
                    return X86EMUL_UNHANDLEABLE;
                memcpy(&data, vio->mmio_large_read, p->size);
                vio->mmio_large_read_bytes = 0;
                vio->mmio_retrying = 0;
            }
            else
            {
                rc = action(IOREQ_READ, p->addr, p->size, &data);
                if ( rc != X86EMUL_OKAY )
                    break;
            }
            switch ( hvm_copy_to_guest_phys(p->data + step * i,
                                            &data, p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                /* Drop the write as real hardware would. */
                continue;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
                /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY)
                break;
        }

        if ( rc == X86EMUL_RETRY )
        {
            vio->mmio_retry = 1;
            vio->mmio_large_read_bytes = p->size;
            memcpy(vio->mmio_large_read, &data, p->size);
        }
    }
    else /* p->dir == IOREQ_WRITE */
    {
        for ( i = 0; i < p->count; i++ )
        {
            data = 0;
            switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
                                              p->size) )
            {
            case HVMCOPY_okay:
                break;
            case HVMCOPY_gfn_paged_out:
            case HVMCOPY_gfn_shared:
                rc = X86EMUL_RETRY;
                break;
            case HVMCOPY_bad_gfn_to_mfn:
                data = ~0;
                break;
            case HVMCOPY_bad_gva_to_gfn:
                ASSERT(0);
                /* fall through */
            default:
                rc = X86EMUL_UNHANDLEABLE;
                break;
            }
            if ( rc != X86EMUL_OKAY )
                break;
            rc = action(IOREQ_WRITE, p->addr, p->size, &data);
            if ( rc != X86EMUL_OKAY )
                break;
        }

        if ( rc == X86EMUL_RETRY )
            vio->mmio_retry = 1;
    }

    if ( i != 0 )
    {
        p->count = i;
        rc = X86EMUL_OKAY;
    }

    return rc;
}
static inline void hvm_mmio_access(struct vcpu *v,
                                   ioreq_t *p,
                                   hvm_mmio_read_t read_handler,
                                   hvm_mmio_write_t write_handler)
{
    unsigned int tmp1, tmp2;
    unsigned long data;

    switch ( p->type ) {
    case IOREQ_TYPE_COPY:
    {
        if ( !p->data_is_ptr ) {
            if ( p->dir == IOREQ_READ )
                p->data = read_handler(v, p->addr, p->size);
            else    /* p->dir == IOREQ_WRITE */
                write_handler(v, p->addr, p->size, p->data);
        } else {    /* p->data_is_ptr */
            int i, sign = (p->df) ? -1 : 1;

            if ( p->dir == IOREQ_READ ) {
                for ( i = 0; i < p->count; i++ ) {
                    data = read_handler(v,
                        p->addr + (sign * i * p->size),
                        p->size);
                    (void)hvm_copy_to_guest_phys(
                        p->data + (sign * i * p->size),
                        &data,
                        p->size);
                }
            } else {/* p->dir == IOREQ_WRITE */
                for ( i = 0; i < p->count; i++ ) {
                    (void)hvm_copy_from_guest_phys(
                        &data,
                        p->data + (sign * i * p->size),
                        p->size);
                    write_handler(v,
                        p->addr + (sign * i * p->size),
                        p->size, data);
                }
            }
        }
        break;
    }

    case IOREQ_TYPE_AND:
        tmp1 = read_handler(v, p->addr, p->size);
        if ( p->dir == IOREQ_WRITE ) {
            tmp2 = tmp1 & (unsigned long) p->data;
            write_handler(v, p->addr, p->size, tmp2);
        }
        p->data = tmp1;
        break;

    case IOREQ_TYPE_ADD:
        tmp1 = read_handler(v, p->addr, p->size);
        if (p->dir == IOREQ_WRITE) {
            tmp2 = tmp1 + (unsigned long) p->data;
            write_handler(v, p->addr, p->size, tmp2);
        }
        p->data = tmp1;
        break;

    case IOREQ_TYPE_OR:
        tmp1 = read_handler(v, p->addr, p->size);
        if ( p->dir == IOREQ_WRITE ) {
            tmp2 = tmp1 | (unsigned long) p->data;
            write_handler(v, p->addr, p->size, tmp2);
        }
        p->data = tmp1;
        break;

    case IOREQ_TYPE_XOR:
        tmp1 = read_handler(v, p->addr, p->size);
        if ( p->dir == IOREQ_WRITE ) {
            tmp2 = tmp1 ^ (unsigned long) p->data;
            write_handler(v, p->addr, p->size, tmp2);
        }
        p->data = tmp1;
        break;

    case IOREQ_TYPE_XCHG:
        /*
         * Note that we don't need to be atomic here since VCPU is accessing
         * its own local APIC.
         */
        tmp1 = read_handler(v, p->addr, p->size);
        write_handler(v, p->addr, p->size, (unsigned long) p->data);
        p->data = tmp1;
        break;

    case IOREQ_TYPE_SUB:
        tmp1 = read_handler(v, p->addr, p->size);
        if ( p->dir == IOREQ_WRITE ) {
            tmp2 = tmp1 - (unsigned long) p->data;
            write_handler(v, p->addr, p->size, tmp2);
        }
        p->data = tmp1;
        break;

    default:
        printk("hvm_mmio_access: error ioreq type %x\n", p->type);
        domain_crash_synchronous();
        break;
    }
}