Пример #1
0
Файл: vmce.c Проект: avsm/xen-1
/*
 * Currently all CPUs are redenzevous at the MCE softirq handler, no
 * need to consider paging p2m type
 * Currently only support HVM guest with EPT paging mode
 * XXX following situation missed:
 * PoD, Foreign mapped, Granted, Shared
 */
int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn)
{
    mfn_t r_mfn;
    p2m_type_t pt;
    int rc;

    /* Always trust dom0's MCE handler will prevent future access */
    if ( d == dom0 )
        return 0;

    if (!mfn_valid(mfn_x(mfn)))
        return -EINVAL;

    if ( !has_hvm_container_domain(d) || !paging_mode_hap(d) )
        return -ENOSYS;

    rc = -1;
    r_mfn = get_gfn_query(d, gfn, &pt);
    if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES)
    {
        ASSERT(mfn_x(r_mfn) == mfn_x(mfn));
        p2m_change_type(d, gfn, pt, p2m_ram_broken);
        rc = 0;
    }
    put_gfn(d, gfn);

    return rc;
}
Пример #2
0
static int __init pvh_setup_p2m(struct domain *d)
{
    struct vcpu *v = d->vcpu[0];
    unsigned long nr_pages;
    unsigned int i;
    int rc;
    bool preempted;
#define MB1_PAGES PFN_DOWN(MB(1))

    nr_pages = dom0_compute_nr_pages(d, NULL, 0);

    pvh_setup_e820(d, nr_pages);
    do {
        preempted = false;
        paging_set_allocation(d, dom0_paging_pages(d, nr_pages),
                              &preempted);
        process_pending_softirqs();
    } while ( preempted );

    /*
     * Memory below 1MB is identity mapped.
     * NB: this only makes sense when booted from legacy BIOS.
     */
    rc = modify_identity_mmio(d, 0, MB1_PAGES, true);
    if ( rc )
    {
        printk("Failed to identity map low 1MB: %d\n", rc);
        return rc;
    }

    /* Populate memory map. */
    for ( i = 0; i < d->arch.nr_e820; i++ )
    {
        unsigned long addr, size;

        if ( d->arch.e820[i].type != E820_RAM )
            continue;

        addr = PFN_DOWN(d->arch.e820[i].addr);
        size = PFN_DOWN(d->arch.e820[i].size);

        if ( addr >= MB1_PAGES )
            rc = pvh_populate_memory_range(d, addr, size);
        else
        {
            ASSERT(addr + size < MB1_PAGES);
            pvh_steal_low_ram(d, addr, size);
        }

        if ( rc )
            return rc;
    }

    if ( cpu_has_vmx && paging_mode_hap(d) && !vmx_unrestricted_guest(v) )
    {
        /*
         * Since Dom0 cannot be migrated, we will only setup the
         * unrestricted guest helpers if they are needed by the current
         * hardware we are running on.
         */
        rc = pvh_setup_vmx_realmode_helpers(d);
        if ( rc )
            return rc;
    }

    return 0;
#undef MB1_PAGES
}
Пример #3
0
void arch_get_domain_info(const struct domain *d,
                          struct xen_domctl_getdomaininfo *info)
{
    if ( paging_mode_hap(d) )
        info->flags |= XEN_DOMINF_hap;
}
Пример #4
0
/* This function can directly access fields which are covered by clean bits. */
static int construct_vmcb(struct vcpu *v)
{
    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
    struct vmcb_struct *vmcb = arch_svm->vmcb;

    vmcb->_general1_intercepts = 
        GENERAL1_INTERCEPT_INTR        | GENERAL1_INTERCEPT_NMI         |
        GENERAL1_INTERCEPT_SMI         | GENERAL1_INTERCEPT_INIT        |
        GENERAL1_INTERCEPT_CPUID       | GENERAL1_INTERCEPT_INVD        |
        GENERAL1_INTERCEPT_HLT         | GENERAL1_INTERCEPT_INVLPG      | 
        GENERAL1_INTERCEPT_INVLPGA     | GENERAL1_INTERCEPT_IOIO_PROT   |
        GENERAL1_INTERCEPT_MSR_PROT    | GENERAL1_INTERCEPT_SHUTDOWN_EVT|
        GENERAL1_INTERCEPT_TASK_SWITCH;
    vmcb->_general2_intercepts = 
        GENERAL2_INTERCEPT_VMRUN       | GENERAL2_INTERCEPT_VMMCALL     |
        GENERAL2_INTERCEPT_VMLOAD      | GENERAL2_INTERCEPT_VMSAVE      |
        GENERAL2_INTERCEPT_STGI        | GENERAL2_INTERCEPT_CLGI        |
        GENERAL2_INTERCEPT_SKINIT      | GENERAL2_INTERCEPT_MWAIT       |
        GENERAL2_INTERCEPT_WBINVD      | GENERAL2_INTERCEPT_MONITOR     |
        GENERAL2_INTERCEPT_XSETBV;

    /* Intercept all debug-register writes. */
    vmcb->_dr_intercepts = ~0u;

    /* Intercept all control-register accesses except for CR2 and CR8. */
    vmcb->_cr_intercepts = ~(CR_INTERCEPT_CR2_READ |
                             CR_INTERCEPT_CR2_WRITE |
                             CR_INTERCEPT_CR8_READ |
                             CR_INTERCEPT_CR8_WRITE);

    /* I/O and MSR permission bitmaps. */
    arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
    if ( arch_svm->msrpm == NULL )
        return -ENOMEM;
    memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);

    svm_disable_intercept_for_msr(v, MSR_FS_BASE);
    svm_disable_intercept_for_msr(v, MSR_GS_BASE);
    svm_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE);
    svm_disable_intercept_for_msr(v, MSR_CSTAR);
    svm_disable_intercept_for_msr(v, MSR_LSTAR);
    svm_disable_intercept_for_msr(v, MSR_STAR);
    svm_disable_intercept_for_msr(v, MSR_SYSCALL_MASK);

    /* LWP_CBADDR MSR is saved and restored by FPU code. So SVM doesn't need to
     * intercept it. */
    if ( cpu_has_lwp )
        svm_disable_intercept_for_msr(v, MSR_AMD64_LWP_CBADDR);

    vmcb->_msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
    vmcb->_iopm_base_pa  = (u64)virt_to_maddr(hvm_io_bitmap);

    /* Virtualise EFLAGS.IF and LAPIC TPR (CR8). */
    vmcb->_vintr.fields.intr_masking = 1;
  
    /* Initialise event injection to no-op. */
    vmcb->eventinj.bytes = 0;

    /* TSC. */
    vmcb->_tsc_offset = 0;

    /* Don't need to intercept RDTSC if CPU supports TSC rate scaling */
    if ( v->domain->arch.vtsc && !cpu_has_tsc_ratio )
    {
        vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
        vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
    }

    /* Guest EFER. */
    v->arch.hvm_vcpu.guest_efer = 0;
    hvm_update_guest_efer(v);

    /* Guest segment limits. */
    vmcb->cs.limit = ~0u;
    vmcb->es.limit = ~0u;
    vmcb->ss.limit = ~0u;
    vmcb->ds.limit = ~0u;
    vmcb->fs.limit = ~0u;
    vmcb->gs.limit = ~0u;

    /* Guest segment bases. */
    vmcb->cs.base = 0;
    vmcb->es.base = 0;
    vmcb->ss.base = 0;
    vmcb->ds.base = 0;
    vmcb->fs.base = 0;
    vmcb->gs.base = 0;

    /* Guest segment AR bytes. */
    vmcb->es.attr.bytes = 0xc93; /* read/write, accessed */
    vmcb->ss.attr.bytes = 0xc93;
    vmcb->ds.attr.bytes = 0xc93;
    vmcb->fs.attr.bytes = 0xc93;
    vmcb->gs.attr.bytes = 0xc93;
    vmcb->cs.attr.bytes = 0xc9b; /* exec/read, accessed */

    /* Guest IDT. */
    vmcb->idtr.base = 0;
    vmcb->idtr.limit = 0;

    /* Guest GDT. */
    vmcb->gdtr.base = 0;
    vmcb->gdtr.limit = 0;

    /* Guest LDT. */
    vmcb->ldtr.sel = 0;
    vmcb->ldtr.base = 0;
    vmcb->ldtr.limit = 0;
    vmcb->ldtr.attr.bytes = 0;

    /* Guest TSS. */
    vmcb->tr.attr.bytes = 0x08b; /* 32-bit TSS (busy) */
    vmcb->tr.base = 0;
    vmcb->tr.limit = 0xff;

    v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
    hvm_update_guest_cr(v, 0);

    v->arch.hvm_vcpu.guest_cr[4] = 0;
    hvm_update_guest_cr(v, 4);

    paging_update_paging_modes(v);

    vmcb->_exception_intercepts =
        HVM_TRAP_MASK
        | (1U << TRAP_no_device);

    if ( paging_mode_hap(v->domain) )
    {
        vmcb->_np_enable = 1; /* enable nested paging */
        vmcb->_g_pat = MSR_IA32_CR_PAT_RESET; /* guest PAT */
        vmcb->_h_cr3 = pagetable_get_paddr(
            p2m_get_pagetable(p2m_get_hostp2m(v->domain)));

        /* No point in intercepting CR3 reads/writes. */
        vmcb->_cr_intercepts &=
            ~(CR_INTERCEPT_CR3_READ|CR_INTERCEPT_CR3_WRITE);

        /*
         * No point in intercepting INVLPG if we don't have shadow pagetables
         * that need to be fixed up.
         */
        vmcb->_general1_intercepts &= ~GENERAL1_INTERCEPT_INVLPG;

        /* PAT is under complete control of SVM when using nested paging. */
        svm_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
    }
    else
    {
        vmcb->_exception_intercepts |= (1U << TRAP_page_fault);
    }

    if ( cpu_has_pause_filter )
    {
        vmcb->_pause_filter_count = SVM_PAUSEFILTER_INIT;
        vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_PAUSE;
    }

    vmcb->cleanbits.bytes = 0;

    return 0;
}
Пример #5
0
unsigned long __init dom0_compute_nr_pages(
    struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len)
{
    nodeid_t node;
    unsigned long avail = 0, nr_pages, min_pages, max_pages;
    bool_t need_paging;

    for_each_node_mask ( node, dom0_nodes )
        avail += avail_domheap_pages_region(node, 0, 0) +
                 initial_images_nrpages(node);

    /* Reserve memory for further dom0 vcpu-struct allocations... */
    avail -= (d->max_vcpus - 1UL)
             << get_order_from_bytes(sizeof(struct vcpu));
    /* ...and compat_l4's, if needed. */
    if ( is_pv_32bit_domain(d) )
        avail -= d->max_vcpus - 1;

    /* Reserve memory for iommu_dom0_init() (rough estimate). */
    if ( iommu_enabled )
    {
        unsigned int s;

        for ( s = 9; s < BITS_PER_LONG; s += 9 )
            avail -= max_pdx >> s;
    }

    need_paging = is_hvm_domain(d) &&
        (!iommu_hap_pt_share || !paging_mode_hap(d));
    for ( ; ; need_paging = 0 )
    {
        nr_pages = dom0_nrpages;
        min_pages = dom0_min_nrpages;
        max_pages = dom0_max_nrpages;

        /*
         * If allocation isn't specified, reserve 1/16th of available memory
         * for things like DMA buffers. This reservation is clamped to a
         * maximum of 128MB.
         */
        if ( nr_pages == 0 )
            nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT));

        /* Negative specification means "all memory - specified amount". */
        if ( (long)nr_pages  < 0 ) nr_pages  += avail;
        if ( (long)min_pages < 0 ) min_pages += avail;
        if ( (long)max_pages < 0 ) max_pages += avail;

        /* Clamp according to min/max limits and available memory. */
        nr_pages = max(nr_pages, min_pages);
        nr_pages = min(nr_pages, max_pages);
        nr_pages = min(nr_pages, avail);

        if ( !need_paging )
            break;

        /* Reserve memory for shadow or HAP. */
        avail -= dom0_paging_pages(d, nr_pages);
    }

    if ( is_pv_domain(d) &&
         (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) &&
         ((dom0_min_nrpages <= 0) || (nr_pages > min_pages)) )
    {
        /*
         * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M
         * note) require that there is enough virtual space beyond the initial
         * allocation to set up their initial page tables. This space is
         * roughly the same size as the p2m table, so make sure the initial
         * allocation doesn't consume more than about half the space that's
         * available between params.virt_base and the address space end.
         */
        unsigned long vstart, vend, end;
        size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : sizeof(long);

        vstart = parms->virt_base;
        vend = round_pgup(parms->virt_kend);
        if ( !parms->unmapped_initrd )
            vend += round_pgup(initrd_len);
        end = vend + nr_pages * sizeof_long;

        if ( end > vstart )
            end += end - vstart;
        if ( end <= vstart ||
             (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) )
        {
            end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long);
            nr_pages = (end - vend) / (2 * sizeof_long);
            if ( dom0_min_nrpages > 0 && nr_pages < min_pages )
                nr_pages = min_pages;
            printk("Dom0 memory clipped to %lu pages\n", nr_pages);
        }
    }

    d->max_pages = min_t(unsigned long, max_pages, UINT_MAX);

    return nr_pages;
}