Esempio n. 1
0
/* Atomically look up a GFN and take a reference count on the backing page. */
struct page_info *get_page_from_gfn_p2m(
    struct domain *d, struct p2m_domain *p2m, unsigned long gfn,
    p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
{
    struct page_info *page = NULL;
    p2m_access_t _a;
    p2m_type_t _t;
    mfn_t mfn;

    /* Allow t or a to be NULL */
    t = t ?: &_t;
    a = a ?: &_a;

    if ( likely(!p2m_locked_by_me(p2m)) )
    {
        /* Fast path: look up and get out */
        p2m_read_lock(p2m);
        mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
             && mfn_valid(mfn)
             && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
        {
            page = mfn_to_page(mfn);
            if ( !get_page(page, d)
                 /* Page could be shared */
                 && !get_page(page, dom_cow) )
                page = NULL;
        }
        p2m_read_unlock(p2m);

        if ( page )
            return page;

        /* Error path: not a suitable GFN at all */
        if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
            return NULL;
    }

    /* Slow path: take the write lock and do fixups */
    mfn = get_gfn_type_access(p2m, gfn, t, a, q, NULL);
    if ( p2m_is_ram(*t) && mfn_valid(mfn) )
    {
        page = mfn_to_page(mfn);
        if ( !get_page(page, d) )
            page = NULL;
    }
    put_gfn(d, gfn);

    return page;
}
Esempio n. 2
0
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
    unsigned long mfn;

    mfn = gmfn_to_mfn(d, gmfn);
    if ( unlikely(!mfn_valid(mfn)) )
    {
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
    page = mfn_to_page(mfn);
    if ( unlikely(!get_page(page, d)) )
    {
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);
            
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);

    return 1;
}
Esempio n. 3
0
File: vmce.c Progetto: avsm/xen-1
/*
 * Currently all CPUs are redenzevous at the MCE softirq handler, no
 * need to consider paging p2m type
 * Currently only support HVM guest with EPT paging mode
 * XXX following situation missed:
 * PoD, Foreign mapped, Granted, Shared
 */
int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn)
{
    mfn_t r_mfn;
    p2m_type_t pt;
    int rc;

    /* Always trust dom0's MCE handler will prevent future access */
    if ( d == dom0 )
        return 0;

    if (!mfn_valid(mfn_x(mfn)))
        return -EINVAL;

    if ( !has_hvm_container_domain(d) || !paging_mode_hap(d) )
        return -ENOSYS;

    rc = -1;
    r_mfn = get_gfn_query(d, gfn, &pt);
    if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES)
    {
        ASSERT(mfn_x(r_mfn) == mfn_x(mfn));
        p2m_change_type(d, gfn, pt, p2m_ram_broken);
        rc = 0;
    }
    put_gfn(d, gfn);

    return rc;
}
Esempio n. 4
0
static void enable_hypercall_page(void)
{
    struct domain *d = current->domain;
    unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
    unsigned long mfn = gmfn_to_mfn(d, gmfn);
    uint8_t *p;

    if ( !mfn_valid(mfn) ||
         !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) )
    {
        gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, mfn);
        return;
    }

    p = map_domain_page(mfn);

    /*
     * We set the bit 31 in %eax (reserved field in the Viridian hypercall
     * calling convention) to differentiate Xen and Viridian hypercalls.
     */
    *(u8  *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */
    *(u32 *)(p + 1) = 0x80000000;
    *(u8  *)(p + 5) = 0x0f; /* vmcall/vmmcall */
    *(u8  *)(p + 6) = 0x01;
    *(u8  *)(p + 7) = ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
                       ? 0xc1 : 0xd9);
    *(u8  *)(p + 8) = 0xc3; /* ret */
    memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */

    unmap_domain_page(p);

    put_page_and_type(mfn_to_page(mfn));
}
Esempio n. 5
0
File: vpmu.c Progetto: Fantu/Xen
static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
{
    struct vcpu *v;
    struct vpmu_struct *vpmu;
    uint64_t mfn;
    void *xenpmu_data;

    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
        return;

    v = d->vcpu[params->vcpu];
    if ( v != current )
        vcpu_pause(v);

    vpmu = vcpu_vpmu(v);
    spin_lock(&vpmu->vpmu_lock);

    vpmu_destroy(v);
    xenpmu_data = vpmu->xenpmu_data;
    vpmu->xenpmu_data = NULL;

    spin_unlock(&vpmu->vpmu_lock);

    if ( xenpmu_data )
    {
        mfn = domain_page_map_to_mfn(xenpmu_data);
        ASSERT(mfn_valid(mfn));
        unmap_domain_page_global(xenpmu_data);
        put_page_and_type(mfn_to_page(mfn));
    }

    if ( v != current )
        vcpu_unpause(v);
}
Esempio n. 6
0
int
set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
{
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
    int rc = 0;
    p2m_access_t a;
    p2m_type_t ot;
    mfn_t omfn;
    unsigned long pg_type;

    if ( !paging_mode_translate(p2m->domain) )
        return 0;

    gfn_lock(p2m, gfn, 0);
    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL);
    /* At the moment we only allow p2m change if gfn has already been made
     * sharable first */
    ASSERT(p2m_is_shared(ot));
    ASSERT(mfn_valid(omfn));
    /* Set the m2p entry to invalid only if there are no further type
     * refs to this page as shared */
    pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
    if ( (pg_type & PGT_count_mask) == 0
         || (pg_type & PGT_type_mask) != PGT_shared_page )
        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);

    P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
    rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared, p2m->default_access);
    gfn_unlock(p2m, gfn, 0);
    if ( 0 == rc )
        gdprintk(XENLOG_ERR,
            "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
            mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)));
    return rc;
}
Esempio n. 7
0
int
set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
{
    int rc = 0;
    p2m_access_t a;
    p2m_type_t ot;
    mfn_t omfn;
    struct p2m_domain *p2m = p2m_get_hostp2m(d);

    if ( !paging_mode_translate(d) )
        return 0;

    gfn_lock(p2m, gfn, 0);
    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL);
    if ( p2m_is_grant(ot) )
    {
        p2m_unlock(p2m);
        domain_crash(d);
        return 0;
    }
    else if ( p2m_is_ram(ot) )
    {
        ASSERT(mfn_valid(omfn));
        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
    }

    P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
    rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct, p2m->default_access);
    gfn_unlock(p2m, gfn, 0);
    if ( 0 == rc )
        gdprintk(XENLOG_ERR,
            "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
            mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)));
    return rc;
}
Esempio n. 8
0
static void
p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
                unsigned int page_order)
{
    unsigned long i;
    mfn_t mfn_return;
    p2m_type_t t;
    p2m_access_t a;

    if ( !paging_mode_translate(p2m->domain) )
    {
        if ( need_iommu(p2m->domain) )
            for ( i = 0; i < (1 << page_order); i++ )
                iommu_unmap_page(p2m->domain, mfn + i);
        return;
    }

    ASSERT(gfn_locked_by_me(p2m, gfn));
    P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);

    if ( mfn_valid(_mfn(mfn)) )
    {
        for ( i = 0; i < (1UL << page_order); i++ )
        {
            mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL);
            if ( !p2m_is_grant(t) && !p2m_is_shared(t) )
                set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
            ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
        }
    }
    set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access);
}
Esempio n. 9
0
/* 
 * pgd3val: this is the value of init_mm.pgd[3] in a PV guest. It is optional.
 *          This to assist debug of modules in the guest. The kernel address 
 *          space seems is always mapped, but modules are not necessarily 
 *          mapped in any arbitraty guest cr3 that we pick if pgd3val is 0. 
 *          Modules should always be addressible if we use cr3 from init_mm. 
 *          Since pgd3val is already a pgd value, cr3->pgd[3], we just need to 
 *          do 2 level lookups.
 *
 * NOTE: 4 level paging works for 32 PAE guests also because cpu runs in IA32-e
 *       mode.
 * Returns: mfn for the given (pv guest) vaddr 
 */
static unsigned long 
dbg_pv_va2mfn(dbgva_t vaddr, struct domain *dp, uint64_t pgd3val)
{
    l4_pgentry_t l4e, *l4t;
    l3_pgentry_t l3e, *l3t;
    l2_pgentry_t l2e, *l2t;
    l1_pgentry_t l1e, *l1t;
    unsigned long cr3 = (pgd3val ? pgd3val : dp->vcpu[0]->arch.cr3);
    unsigned long mfn = cr3 >> PAGE_SHIFT;

    DBGP2("vaddr:%lx domid:%d cr3:%lx pgd3:%lx\n", vaddr, dp->domain_id, 
          cr3, pgd3val);

    if ( pgd3val == 0 )
    {
        l4t = mfn_to_virt(mfn);
        l4e = l4t[l4_table_offset(vaddr)];
        mfn = l4e_get_pfn(l4e);
        DBGP2("l4t:%p l4to:%lx l4e:%lx mfn:%lx\n", l4t, 
              l4_table_offset(vaddr), l4e, mfn);
        if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
        {
            DBGP1("l4 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3);
            return INVALID_MFN;
        }

        l3t = mfn_to_virt(mfn);
        l3e = l3t[l3_table_offset(vaddr)];
        mfn = l3e_get_pfn(l3e);
        DBGP2("l3t:%p l3to:%lx l3e:%lx mfn:%lx\n", l3t, 
              l3_table_offset(vaddr), l3e, mfn);
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
        {
            DBGP1("l3 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3);
            return INVALID_MFN;
        }
    }

    l2t = mfn_to_virt(mfn);
    l2e = l2t[l2_table_offset(vaddr)];
    mfn = l2e_get_pfn(l2e);
    DBGP2("l2t:%p l2to:%lx l2e:%lx mfn:%lx\n", l2t, l2_table_offset(vaddr),
          l2e, mfn);
    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
         (l2e_get_flags(l2e) & _PAGE_PSE) )
    {
        DBGP1("l2 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3);
        return INVALID_MFN;
    }
    l1t = mfn_to_virt(mfn);
    l1e = l1t[l1_table_offset(vaddr)];
    mfn = l1e_get_pfn(l1e);
    DBGP2("l1t:%p l1to:%lx l1e:%lx mfn:%lx\n", l1t, l1_table_offset(vaddr),
          l1e, mfn);

    return mfn_valid(mfn) ? mfn : INVALID_MFN;
}
Esempio n. 10
0
/**
 * p2m_mem_paging_evict - Mark a guest page as paged-out
 * @d: guest domain
 * @gfn: guest page to evict
 *
 * Returns 0 for success or negative errno values if eviction is not possible.
 *
 * p2m_mem_paging_evict() is called by the pager and will free a guest page and
 * release it back to Xen. If the following conditions are met the page can be
 * freed:
 * - the gfn is backed by a mfn
 * - the gfn was nominated
 * - the mfn has still exactly one user and has no special meaning
 *
 * After successful nomination some other process could have mapped the page. In
 * this case eviction can not be done. If the gfn was populated before the pager
 * could evict it, eviction can not be done either. In this case the gfn is
 * still backed by a mfn.
 */
int p2m_mem_paging_evict(struct domain *d, unsigned long gfn)
{
    struct page_info *page;
    p2m_type_t p2mt;
    p2m_access_t a;
    mfn_t mfn;
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
    int ret = -EBUSY;

    gfn_lock(p2m, gfn, 0);

    /* Get mfn */
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL);
    if ( unlikely(!mfn_valid(mfn)) )
        goto out;

    /* Allow only nominated pages */
    if ( p2mt != p2m_ram_paging_out )
        goto out;

    /* Get the page so it doesn't get modified under Xen's feet */
    page = mfn_to_page(mfn);
    if ( unlikely(!get_page(page, d)) )
        goto out;

    /* Check page count and type once more */
    if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
         (2 | PGC_allocated) )
        goto out_put;

    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
        goto out_put;

    /* Decrement guest domain's ref count of the page */
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    /* Remove mapping from p2m table */
    set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_ram_paged, a);

    /* Clear content before returning the page to Xen */
    scrub_one_page(page);

    /* Track number of paged gfns */
    atomic_inc(&d->paged_pages);

    ret = 0;

 out_put:
    /* Put the page back so it gets freed */
    put_page(page);

 out:
    gfn_unlock(p2m, gfn, 0);
    return ret;
}
Esempio n. 11
0
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
#ifdef CONFIG_X86
    p2m_type_t p2mt;
#endif
    unsigned long mfn;

#ifdef CONFIG_X86
    mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); 
    if ( unlikely(p2m_is_paging(p2mt)) )
    {
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn);
        return 1;
    }
#else
    mfn = gmfn_to_mfn(d, gmfn);
#endif
    if ( unlikely(!mfn_valid(mfn)) )
    {
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
    page = mfn_to_page(mfn);
#ifdef CONFIG_X86
    /* If gmfn is shared, just drop the guest reference (which may or may not
     * free the page) */
    if(p2m_is_shared(p2mt))
    {
        put_page_and_type(page);
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        return 1;
    }

#endif /* CONFIG_X86 */
    if ( unlikely(!get_page(page, d)) )
    {
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);
            
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);

    return 1;
}
Esempio n. 12
0
int p2m_set_altp2m_mem_access(struct domain *d, struct p2m_domain *hp2m,
                              struct p2m_domain *ap2m, p2m_access_t a,
                              gfn_t gfn)
{
    mfn_t mfn;
    p2m_type_t t;
    p2m_access_t old_a;
    unsigned int page_order;
    unsigned long gfn_l = gfn_x(gfn);
    int rc;

    mfn = ap2m->get_entry(ap2m, gfn_l, &t, &old_a, 0, NULL, NULL);

    /* Check host p2m if no valid entry in alternate */
    if ( !mfn_valid(mfn) )
    {

        mfn = __get_gfn_type_access(hp2m, gfn_l, &t, &old_a,
                                    P2M_ALLOC | P2M_UNSHARE, &page_order, 0);

        rc = -ESRCH;
        if ( !mfn_valid(mfn) || t != p2m_ram_rw )
            return rc;

        /* If this is a superpage, copy that first */
        if ( page_order != PAGE_ORDER_4K )
        {
            unsigned long mask = ~((1UL << page_order) - 1);
            unsigned long gfn2_l = gfn_l & mask;
            mfn_t mfn2 = _mfn(mfn_x(mfn) & mask);

            rc = ap2m->set_entry(ap2m, gfn2_l, mfn2, page_order, t, old_a, 1);
            if ( rc )
                return rc;
        }
    }

    return ap2m->set_entry(ap2m, gfn_l, mfn, PAGE_ORDER_4K, t, a,
                         (current->domain != d));
}
Esempio n. 13
0
File: setup.c Progetto: caomw/xen
void __init discard_initial_modules(void)
{
    struct bootmodules *mi = &bootinfo.modules;
    int i;

    for ( i = 0; i < mi->nr_mods; i++ )
    {
        paddr_t s = mi->module[i].start;
        paddr_t e = s + PAGE_ALIGN(mi->module[i].size);

        if ( mi->module[i].kind == BOOTMOD_XEN )
            continue;

        if ( !mfn_valid(paddr_to_pfn(s)) || !mfn_valid(paddr_to_pfn(e)))
            continue;

        dt_unreserved_regions(s, e, init_domheap_pages, 0);
    }

    mi->nr_mods = 0;

    remove_early_mappings();
}
Esempio n. 14
0
static struct mcinfo_bank *mca_init_bank(enum mca_source who,
                                         struct mc_info *mi, int bank)
{
    struct mcinfo_bank *mib;
    uint64_t addr=0, misc = 0;

    if (!mi)
        return NULL;

    mib = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_bank));
    if (!mib)
    {
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
        return NULL;
    }

    memset(mib, 0, sizeof (struct mcinfo_bank));
    mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));

    mib->common.type = MC_TYPE_BANK;
    mib->common.size = sizeof (struct mcinfo_bank);
    mib->mc_bank = bank;

    addr = misc = 0;
    if (mib->mc_status & MCi_STATUS_MISCV)
        mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));

    if (mib->mc_status & MCi_STATUS_ADDRV)
    {
        mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));

        if (mfn_valid(paddr_to_pfn(mib->mc_addr))) {
            struct domain *d;

            d = maddr_get_owner(mib->mc_addr);
            if (d != NULL && (who == MCA_POLLER ||
                              who == MCA_CMCI_HANDLER))
                mib->mc_domid = d->domain_id;
        }
    }

    if (who == MCA_CMCI_HANDLER) {
        mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
        rdtscll(mib->mc_tsc);
    }

    return mib;
}
Esempio n. 15
0
File: mce.c Progetto: dzan/xenOnArm
static void mca_init_bank(enum mca_source who,
    struct mc_info *mi, int bank)
{
    struct mcinfo_bank *mib;

    if (!mi)
        return;

    mib = x86_mcinfo_reserve(mi, sizeof(*mib));
    if (!mib)
    {
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
        return;
    }

    mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));

    mib->common.type = MC_TYPE_BANK;
    mib->common.size = sizeof (struct mcinfo_bank);
    mib->mc_bank = bank;

    if (mib->mc_status & MCi_STATUS_MISCV)
        mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));

    if (mib->mc_status & MCi_STATUS_ADDRV)
        mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));

    if ((mib->mc_status & MCi_STATUS_MISCV) &&
        (mib->mc_status & MCi_STATUS_ADDRV) &&
        (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) &&
        (who == MCA_POLLER || who == MCA_CMCI_HANDLER) &&
        (mfn_valid(paddr_to_pfn(mib->mc_addr))))
    {
        struct domain *d;

        d = maddr_get_owner(mib->mc_addr);
        if (d)
            mib->mc_domid = d->domain_id;
    }

    if (who == MCA_CMCI_HANDLER) {
        mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
        rdtscll(mib->mc_tsc);
    }
}
Esempio n. 16
0
File: p2m.c Progetto: Fantu/Xen
/* Put any references on the single 4K page referenced by pte.  TODO:
 * Handle superpages, for now we only take special references for leaf
 * pages (specifically foreign ones, which can't be super mapped today).
 */
static void p2m_put_l3_page(const lpae_t pte)
{
    ASSERT(p2m_valid(pte));

    /* TODO: Handle other p2m types
     *
     * It's safe to do the put_page here because page_alloc will
     * flush the TLBs if the page is reallocated before the end of
     * this loop.
     */
    if ( p2m_is_foreign(pte.p2m.type) )
    {
        unsigned long mfn = pte.p2m.base;

        ASSERT(mfn_valid(mfn));
        put_page(mfn_to_page(mfn));
    }
}
Esempio n. 17
0
/**
 * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
 * @d: guest domain
 * @gfn: guest page to nominate
 *
 * Returns 0 for success or negative errno values if gfn is not pageable.
 *
 * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
 * can be paged out. If the following conditions are met the p2mt will be
 * changed:
 * - the gfn is backed by a mfn
 * - the p2mt of the gfn is pageable
 * - the mfn is not used for IO
 * - the mfn has exactly one user and has no special meaning
 *
 * Once the p2mt is changed the page is readonly for the guest.  On success the
 * pager can write the page contents to disk and later evict the page.
 */
int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn)
{
    struct page_info *page;
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
    p2m_type_t p2mt;
    p2m_access_t a;
    mfn_t mfn;
    int ret = -EBUSY;

    gfn_lock(p2m, gfn, 0);

    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL);

    /* Check if mfn is valid */
    if ( !mfn_valid(mfn) )
        goto out;

    /* Check p2m type */
    if ( !p2m_is_pageable(p2mt) )
        goto out;

    /* Check for io memory page */
    if ( is_iomem_page(mfn_x(mfn)) )
        goto out;

    /* Check page count and type */
    page = mfn_to_page(mfn);
    if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
         (1 | PGC_allocated) )
        goto out;

    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
        goto out;

    /* Fix p2m entry */
    set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
    ret = 0;

 out:
    gfn_unlock(p2m, gfn, 0);
    return ret;
}
Esempio n. 18
0
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
    struct vcpu *v, unsigned long gva, uint32_t *pfec)
{
    unsigned long cr3;
    uint32_t missing;
    mfn_t top_mfn;
    void *top_map;
    p2m_type_t p2mt;
    walk_t gw;

    /* Get the top-level table's MFN */
    cr3 = v->arch.hvm_vcpu.guest_cr[3];
    top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt);
    if ( !p2m_is_ram(p2mt) )
    {
        pfec[0] &= ~PFEC_page_present;
        return INVALID_GFN;
    }

    /* Map the top-level table and call the tree-walker */
    ASSERT(mfn_valid(mfn_x(top_mfn)));
    top_map = map_domain_page(mfn_x(top_mfn));
#if GUEST_PAGING_LEVELS == 3
    top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
    missing = guest_walk_tables(v, gva, &gw, pfec[0], top_mfn, top_map);
    unmap_domain_page(top_map);

    /* Interpret the answer */
    if ( missing == 0 ) 
        return gfn_x(guest_l1e_get_gfn(gw.l1e));
    
    if ( missing & _PAGE_PRESENT )
        pfec[0] &= ~PFEC_page_present;
    
    return INVALID_GFN;
}
Esempio n. 19
0
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
    struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
    paddr_t ga, uint32_t *pfec, unsigned int *page_order)
{
    uint32_t missing;
    mfn_t top_mfn;
    void *top_map;
    p2m_type_t p2mt;
    walk_t gw;
    unsigned long top_gfn;
    struct page_info *top_page;

    /* Get the top-level table's MFN */
    top_gfn = cr3 >> PAGE_SHIFT;
    top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn,
                                     &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        ASSERT(p2m_is_hostp2m(p2m));
        pfec[0] = PFEC_page_paged;
        if ( top_page )
            put_page(top_page);
        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
        return INVALID_GFN;
    }
    if ( p2m_is_shared(p2mt) )
    {
        pfec[0] = PFEC_page_shared;
        if ( top_page )
            put_page(top_page);
        return INVALID_GFN;
    }
    if ( !top_page )
    {
        pfec[0] &= ~PFEC_page_present;
        return INVALID_GFN;
    }
    top_mfn = _mfn(page_to_mfn(top_page));

    /* Map the top-level table and call the tree-walker */
    ASSERT(mfn_valid(mfn_x(top_mfn)));
    top_map = map_domain_page(mfn_x(top_mfn));
#if GUEST_PAGING_LEVELS == 3
    top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
    missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map);
    unmap_domain_page(top_map);
    put_page(top_page);

    /* Interpret the answer */
    if ( missing == 0 )
    {
        gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
        struct page_info *page;
        page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt,
                                     NULL, P2M_ALLOC | P2M_UNSHARE);
        if ( page )
            put_page(page);
        if ( p2m_is_paging(p2mt) )
        {
            ASSERT(p2m_is_hostp2m(p2m));
            pfec[0] = PFEC_page_paged;
            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
            return INVALID_GFN;
        }
        if ( p2m_is_shared(p2mt) )
        {
            pfec[0] = PFEC_page_shared;
            return INVALID_GFN;
        }

        if ( page_order )
            *page_order = guest_walk_to_page_order(&gw);

        return gfn_x(gfn);
    }

    if ( missing & _PAGE_PRESENT )
        pfec[0] &= ~PFEC_page_present;

    if ( missing & _PAGE_INVALID_BITS ) 
        pfec[0] |= PFEC_reserved_bit;

    if ( missing & _PAGE_PAGED )
        pfec[0] = PFEC_page_paged;

    if ( missing & _PAGE_SHARED )
        pfec[0] = PFEC_page_shared;

    return INVALID_GFN;
}
Esempio n. 20
0
File: memory.c Progetto: blue236/xen
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
#ifdef CONFIG_X86
    p2m_type_t p2mt;
#endif
    unsigned long mfn;

#ifdef CONFIG_X86
    mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); 
    if ( unlikely(p2m_is_paging(p2mt)) )
    {
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        put_gfn(d, gmfn);
        /* If the page hasn't yet been paged out, there is an
         * actual page that needs to be released. */
        if ( p2mt == p2m_ram_paging_out )
        {
            ASSERT(mfn_valid(mfn));
            page = mfn_to_page(mfn);
            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
                put_page(page);
        }
        p2m_mem_paging_drop_page(d, gmfn, p2mt);
        return 1;
    }
    if ( p2mt == p2m_mmio_direct )
    {
        clear_mmio_p2m_entry(d, gmfn, _mfn(mfn));
        put_gfn(d, gmfn);
        return 1;
    }
#else
    mfn = gmfn_to_mfn(d, gmfn);
#endif
    if ( unlikely(!mfn_valid(mfn)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
#ifdef CONFIG_X86
    if ( p2m_is_shared(p2mt) )
    {
        /* Unshare the page, bail out on error. We unshare because 
         * we might be the only one using this shared page, and we
         * need to trigger proper cleanup. Once done, this is 
         * like any other page. */
        if ( mem_sharing_unshare_page(d, gmfn, 0) )
        {
            put_gfn(d, gmfn);
            (void)mem_sharing_notify_enomem(d, gmfn, 0);
            return 0;
        }
        /* Maybe the mfn changed */
        mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt));
        ASSERT(!p2m_is_shared(p2mt));
    }
#endif /* CONFIG_X86 */

    page = mfn_to_page(mfn);
    if ( unlikely(!get_page(page, d)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);

    /*
     * With the lack of an IOMMU on some platforms, domains with DMA-capable
     * device must retrieve the same pfn when the hypercall populate_physmap
     * is called.
     *
     * For this purpose (and to match populate_physmap() behavior), the page
     * is kept allocated.
     */
    if ( !is_domain_direct_mapped(d) &&
         test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);
    put_gfn(d, gmfn);

    return 1;
}
Esempio n. 21
0
File: memory.c Progetto: blue236/xen
static void populate_physmap(struct memop_args *a)
{
    struct page_info *page;
    unsigned int i, j;
    xen_pfn_t gpfn, mfn;
    struct domain *d = a->domain;

    if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
                                     a->nr_extents-1) )
        return;

    if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER :
                            max_order(current->domain)) )
        return;

    for ( i = a->nr_done; i < a->nr_extents; i++ )
    {
        if ( i != a->nr_done && hypercall_preempt_check() )
        {
            a->preempted = 1;
            goto out;
        }

        if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
            goto out;

        if ( a->memflags & MEMF_populate_on_demand )
        {
            if ( guest_physmap_mark_populate_on_demand(d, gpfn,
                                                       a->extent_order) < 0 )
                goto out;
        }
        else
        {
            if ( is_domain_direct_mapped(d) )
            {
                mfn = gpfn;

                for ( j = 0; j < (1U << a->extent_order); j++, mfn++ )
                {
                    if ( !mfn_valid(mfn) )
                    {
                        gdprintk(XENLOG_INFO, "Invalid mfn %#"PRI_xen_pfn"\n",
                                 mfn);
                        goto out;
                    }

                    page = mfn_to_page(mfn);
                    if ( !get_page(page, d) )
                    {
                        gdprintk(XENLOG_INFO,
                                 "mfn %#"PRI_xen_pfn" doesn't belong to d%d\n",
                                  mfn, d->domain_id);
                        goto out;
                    }
                    put_page(page);
                }

                mfn = gpfn;
                page = mfn_to_page(mfn);
            }
            else
            {
                page = alloc_domheap_pages(d, a->extent_order, a->memflags);

                if ( unlikely(!page) )
                {
                    if ( !opt_tmem || a->extent_order )
                        gdprintk(XENLOG_INFO,
                                 "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n",
                                 a->extent_order, d->domain_id, a->memflags,
                                 i, a->nr_extents);
                    goto out;
                }

                mfn = page_to_mfn(page);
            }

            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);

            if ( !paging_mode_translate(d) )
            {
                for ( j = 0; j < (1U << a->extent_order); j++ )
                    set_gpfn_from_mfn(mfn + j, gpfn + j);

                /* Inform the domain of the new page's machine address. */ 
                if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
                    goto out;
            }
        }
    }

out:
    a->nr_done = i;
}
Esempio n. 22
0
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr,
                               p2m_type_t t)
{
    paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT;
    /* xn and write bit will be defined in the switch */
    lpae_t e = (lpae_t) {
        .p2m.af = 1,
        .p2m.sh = LPAE_SH_OUTER,
        .p2m.read = 1,
        .p2m.mattr = mattr,
        .p2m.table = 1,
        .p2m.valid = 1,
        .p2m.type = t,
    };

    BUILD_BUG_ON(p2m_max_real_type > (1 << 4));

    switch (t)
    {
    case p2m_ram_rw:
        e.p2m.xn = 0;
        e.p2m.write = 1;
        break;

    case p2m_ram_ro:
        e.p2m.xn = 0;
        e.p2m.write = 0;
        break;

    case p2m_map_foreign:
    case p2m_grant_map_rw:
    case p2m_mmio_direct:
        e.p2m.xn = 1;
        e.p2m.write = 1;
        break;

    case p2m_grant_map_ro:
    case p2m_invalid:
        e.p2m.xn = 1;
        e.p2m.write = 0;
        break;

    case p2m_max_real_type:
        BUG();
        break;
    }

    ASSERT(!(pa & ~PAGE_MASK));
    ASSERT(!(pa & ~PADDR_MASK));

    e.bits |= pa;

    return e;
}

/* Allocate a new page table page and hook it in via the given entry */
static int p2m_create_table(struct domain *d,
                            lpae_t *entry)
{
    struct p2m_domain *p2m = &d->arch.p2m;
    struct page_info *page;
    void *p;
    lpae_t pte;

    BUG_ON(entry->p2m.valid);

    page = alloc_domheap_page(NULL, 0);
    if ( page == NULL )
        return -ENOMEM;

    page_list_add(page, &p2m->pages);

    p = __map_domain_page(page);
    clear_page(p);
    unmap_domain_page(p);

    pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);

    write_pte(entry, pte);

    return 0;
}

enum p2m_operation {
    INSERT,
    ALLOCATE,
    REMOVE,
    RELINQUISH,
    CACHEFLUSH,
};

static int apply_p2m_changes(struct domain *d,
                     enum p2m_operation op,
                     paddr_t start_gpaddr,
                     paddr_t end_gpaddr,
                     paddr_t maddr,
                     int mattr,
                     p2m_type_t t)
{
    int rc;
    struct p2m_domain *p2m = &d->arch.p2m;
    lpae_t *first = NULL, *second = NULL, *third = NULL;
    paddr_t addr;
    unsigned long cur_first_page = ~0,
                  cur_first_offset = ~0,
                  cur_second_offset = ~0;
    unsigned long count = 0;
    unsigned int flush = 0;
    bool_t populate = (op == INSERT || op == ALLOCATE);
    lpae_t pte;

    spin_lock(&p2m->lock);

    if ( d != current->domain )
        p2m_load_VTTBR(d);

    addr = start_gpaddr;
    while ( addr < end_gpaddr )
    {
        if ( cur_first_page != p2m_first_level_index(addr) )
        {
            if ( first ) unmap_domain_page(first);
            first = p2m_map_first(p2m, addr);
            if ( !first )
            {
                rc = -EINVAL;
                goto out;
            }
            cur_first_page = p2m_first_level_index(addr);
        }

        if ( !first[first_table_offset(addr)].p2m.valid )
        {
            if ( !populate )
            {
                addr = (addr + FIRST_SIZE) & FIRST_MASK;
                continue;
            }

            rc = p2m_create_table(d, &first[first_table_offset(addr)]);
            if ( rc < 0 )
            {
                printk("p2m_populate_ram: L1 failed\n");
                goto out;
            }
        }

        BUG_ON(!first[first_table_offset(addr)].p2m.valid);

        if ( cur_first_offset != first_table_offset(addr) )
        {
            if (second) unmap_domain_page(second);
            second = map_domain_page(first[first_table_offset(addr)].p2m.base);
            cur_first_offset = first_table_offset(addr);
        }
        /* else: second already valid */

        if ( !second[second_table_offset(addr)].p2m.valid )
        {
            if ( !populate )
            {
                addr = (addr + SECOND_SIZE) & SECOND_MASK;
                continue;
            }

            rc = p2m_create_table(d, &second[second_table_offset(addr)]);
            if ( rc < 0 ) {
                printk("p2m_populate_ram: L2 failed\n");
                goto out;
            }
        }

        BUG_ON(!second[second_table_offset(addr)].p2m.valid);

        if ( cur_second_offset != second_table_offset(addr) )
        {
            /* map third level */
            if (third) unmap_domain_page(third);
            third = map_domain_page(second[second_table_offset(addr)].p2m.base);
            cur_second_offset = second_table_offset(addr);
        }

        pte = third[third_table_offset(addr)];

        flush |= pte.p2m.valid;

        /* TODO: Handle other p2m type
         *
         * It's safe to do the put_page here because page_alloc will
         * flush the TLBs if the page is reallocated before the end of
         * this loop.
         */
        if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) )
        {
            unsigned long mfn = pte.p2m.base;

            ASSERT(mfn_valid(mfn));
            put_page(mfn_to_page(mfn));
        }

        /* Allocate a new RAM page and attach */
        switch (op) {
            case ALLOCATE:
                {
                    struct page_info *page;

                    ASSERT(!pte.p2m.valid);
                    rc = -ENOMEM;
                    page = alloc_domheap_page(d, 0);
                    if ( page == NULL ) {
                        printk("p2m_populate_ram: failed to allocate page\n");
                        goto out;
                    }

                    pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);

                    write_pte(&third[third_table_offset(addr)], pte);
                }
                break;
            case INSERT:
                {
                    pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
                    write_pte(&third[third_table_offset(addr)], pte);
                    maddr += PAGE_SIZE;
                }
                break;
            case RELINQUISH:
            case REMOVE:
                {
                    if ( !pte.p2m.valid )
                    {
                        count++;
                        break;
                    }

                    count += 0x10;

                    memset(&pte, 0x00, sizeof(pte));
                    write_pte(&third[third_table_offset(addr)], pte);
                    count++;
                }
                break;

            case CACHEFLUSH:
                {
                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
                        break;

                    flush_page_to_ram(pte.p2m.base);
                }
                break;
        }

        /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
        if ( op == RELINQUISH && count >= 0x2000 )
        {
            if ( hypercall_preempt_check() )
            {
                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
                rc = -EAGAIN;
                goto out;
            }
            count = 0;
        }

        /* Got the next page */
        addr += PAGE_SIZE;
    }

    if ( flush )
    {
        /* At the beginning of the function, Xen is updating VTTBR
         * with the domain where the mappings are created. In this
         * case it's only necessary to flush TLBs on every CPUs with
         * the current VMID (our domain).
         */
        flush_tlb();
    }

    if ( op == ALLOCATE || op == INSERT )
    {
        unsigned long sgfn = paddr_to_pfn(start_gpaddr);
        unsigned long egfn = paddr_to_pfn(end_gpaddr);

        p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn);
        p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn);
    }

    rc = 0;

out:
    if (third) unmap_domain_page(third);
    if (second) unmap_domain_page(second);
    if (first) unmap_domain_page(first);

    if ( d != current->domain )
        p2m_load_VTTBR(current->domain);

    spin_unlock(&p2m->lock);

    return rc;
}
Esempio n. 23
0
static mfn_t
p2m_pt_get_entry(struct p2m_domain *p2m, unsigned long gfn,
                 p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
                 unsigned int *page_order)
{
    mfn_t mfn;
    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
    l2_pgentry_t *l2e;
    l1_pgentry_t *l1e;
    unsigned long l1e_flags;
    p2m_type_t l1t;

    ASSERT(paging_mode_translate(p2m->domain));

    /* XXX This is for compatibility with the old model, where anything not 
     * XXX marked as RAM was considered to be emulated MMIO space.
     * XXX Once we start explicitly registering MMIO regions in the p2m 
     * XXX we will return p2m_invalid for unmapped gfns */
    *t = p2m_mmio_dm;
    /* Not implemented except with EPT */
    *a = p2m_access_rwx; 

    if ( gfn > p2m->max_mapped_pfn )
        /* This pfn is higher than the highest the p2m map currently holds */
        return _mfn(INVALID_MFN);

    mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));

    {
        l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
        l4e += l4_table_offset(addr);
        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
        {
            unmap_domain_page(l4e);
            return _mfn(INVALID_MFN);
        }
        mfn = _mfn(l4e_get_pfn(*l4e));
        unmap_domain_page(l4e);
    }
    {
        l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
        l3e += l3_table_offset(addr);
pod_retry_l3:
        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
        {
            if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand )
            {
                if ( q & P2M_ALLOC )
                {
                    if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) )
                        goto pod_retry_l3;
                    gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
                }
                else
                    *t = p2m_populate_on_demand;
            }
            unmap_domain_page(l3e);
            return _mfn(INVALID_MFN);
        }
        else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
        {
            mfn = _mfn(l3e_get_pfn(*l3e) +
                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
                       l1_table_offset(addr));
            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
            unmap_domain_page(l3e);

            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
            if ( page_order )
                *page_order = PAGE_ORDER_1G;
            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
        }

        mfn = _mfn(l3e_get_pfn(*l3e));
        unmap_domain_page(l3e);
    }

    l2e = map_domain_page(mfn_x(mfn));
    l2e += l2_table_offset(addr);

pod_retry_l2:
    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
    {
        /* PoD: Try to populate a 2-meg chunk */
        if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
        {
            if ( q & P2M_ALLOC ) {
                if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) )
                    goto pod_retry_l2;
            } else
                *t = p2m_populate_on_demand;
        }
    
        unmap_domain_page(l2e);
        return _mfn(INVALID_MFN);
    }
    else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
    {
        mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
        *t = p2m_flags_to_type(l2e_get_flags(*l2e));
        unmap_domain_page(l2e);
        
        ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
        if ( page_order )
            *page_order = PAGE_ORDER_2M;
        return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
    }

    mfn = _mfn(l2e_get_pfn(*l2e));
    unmap_domain_page(l2e);

    l1e = map_domain_page(mfn_x(mfn));
    l1e += l1_table_offset(addr);
pod_retry_l1:
    l1e_flags = l1e_get_flags(*l1e);
    l1t = p2m_flags_to_type(l1e_flags);
    if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) )
    {
        /* PoD: Try to populate */
        if ( l1t == p2m_populate_on_demand )
        {
            if ( q & P2M_ALLOC ) {
                if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) )
                    goto pod_retry_l1;
            } else
                *t = p2m_populate_on_demand;
        }
    
        unmap_domain_page(l1e);
        return _mfn(INVALID_MFN);
    }
    mfn = _mfn(l1e_get_pfn(*l1e));
    *t = l1t;
    unmap_domain_page(l1e);

    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t));
    if ( page_order )
        *page_order = PAGE_ORDER_4K;
    return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
}
Esempio n. 24
0
/*
 * If mem_access is in use it might have been the reason why get_page_from_gva
 * failed to fetch the page, as it uses the MMU for the permission checking.
 * Only in these cases we do a software-based type check and fetch the page if
 * we indeed found a conflicting mem_access setting.
 */
struct page_info*
p2m_mem_access_check_and_get_page(vaddr_t gva, unsigned long flag,
                                  const struct vcpu *v)
{
    long rc;
    paddr_t ipa;
    gfn_t gfn;
    mfn_t mfn;
    xenmem_access_t xma;
    p2m_type_t t;
    struct page_info *page = NULL;
    struct p2m_domain *p2m = &v->domain->arch.p2m;

    rc = gva_to_ipa(gva, &ipa, flag);
    if ( rc < 0 )
        goto err;

    gfn = _gfn(paddr_to_pfn(ipa));

    /*
     * We do this first as this is faster in the default case when no
     * permission is set on the page.
     */
    rc = __p2m_get_mem_access(v->domain, gfn, &xma);
    if ( rc < 0 )
        goto err;

    /* Let's check if mem_access limited the access. */
    switch ( xma )
    {
    default:
    case XENMEM_access_rwx:
    case XENMEM_access_rw:
        /*
         * If mem_access contains no rw perm restrictions at all then the original
         * fault was correct.
         */
        goto err;
    case XENMEM_access_n2rwx:
    case XENMEM_access_n:
    case XENMEM_access_x:
        /*
         * If no r/w is permitted by mem_access, this was a fault caused by mem_access.
         */
        break;
    case XENMEM_access_wx:
    case XENMEM_access_w:
        /*
         * If this was a read then it was because of mem_access, but if it was
         * a write then the original get_page_from_gva fault was correct.
         */
        if ( flag == GV2M_READ )
            break;
        else
            goto err;
    case XENMEM_access_rx2rw:
    case XENMEM_access_rx:
    case XENMEM_access_r:
        /*
         * If this was a write then it was because of mem_access, but if it was
         * a read then the original get_page_from_gva fault was correct.
         */
        if ( flag == GV2M_WRITE )
            break;
        else
            goto err;
    }

    /*
     * We had a mem_access permission limiting the access, but the page type
     * could also be limiting, so we need to check that as well.
     */
    mfn = p2m_get_entry(p2m, gfn, &t, NULL, NULL);
    if ( mfn_eq(mfn, INVALID_MFN) )
        goto err;

    if ( !mfn_valid(mfn) )
        goto err;

    /*
     * Base type doesn't allow r/w
     */
    if ( t != p2m_ram_rw )
        goto err;

    page = mfn_to_page(mfn_x(mfn));

    if ( unlikely(!get_page(page, v->domain)) )
        page = NULL;

err:
    return page;
}
Esempio n. 25
0
long p2m_pt_audit_p2m(struct p2m_domain *p2m)
{
    unsigned long entry_count = 0, pmbad = 0;
    unsigned long mfn, gfn, m2pfn;
    int test_linear;
    struct domain *d = p2m->domain;

    ASSERT(p2m_locked_by_me(p2m));
    ASSERT(pod_locked_by_me(p2m));

    test_linear = ( (d == current->domain)
                    && !pagetable_is_null(current->arch.monitor_table) );
    if ( test_linear )
        flush_tlb_local();

    /* Audit part one: walk the domain's p2m table, checking the entries. */
    if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
    {
        l2_pgentry_t *l2e;
        l1_pgentry_t *l1e;
        int i1, i2;

#if CONFIG_PAGING_LEVELS == 4
        l4_pgentry_t *l4e;
        l3_pgentry_t *l3e;
        int i4, i3;
        l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
#else /* CONFIG_PAGING_LEVELS == 3 */
        l3_pgentry_t *l3e;
        int i3;
        l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
#endif

        gfn = 0;
#if CONFIG_PAGING_LEVELS >= 4
        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
        {
            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
            {
                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
                continue;
            }
            l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
#endif
            for ( i3 = 0;
                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
                  i3++ )
            {
                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
                {
                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                    continue;
                }

                /* check for 1GB super page */
                if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
                {
                    mfn = l3e_get_pfn(l3e[i3]);
                    ASSERT(mfn_valid(_mfn(mfn)));
                    /* we have to cover 512x512 4K pages */
                    for ( i2 = 0; 
                          i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
                          i2++)
                    {
                        m2pfn = get_gpfn_from_mfn(mfn+i2);
                        if ( m2pfn != (gfn + i2) )
                        {
                            pmbad++;
                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                       " -> gfn %#lx\n", gfn+i2, mfn+i2,
                                       m2pfn);
                            BUG();
                        }
                        gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }
                }

                l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
                {
                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
                    {
                        if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
                             && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
                                  == p2m_populate_on_demand ) )
                            entry_count+=SUPERPAGE_PAGES;
                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }
                    
                    /* check for super page */
                    if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
                    {
                        mfn = l2e_get_pfn(l2e[i2]);
                        ASSERT(mfn_valid(_mfn(mfn)));
                        for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
                        {
                            m2pfn = get_gpfn_from_mfn(mfn+i1);
                            /* Allow shared M2Ps */
                            if ( (m2pfn != (gfn + i1)) &&
                                 (m2pfn != SHARED_M2P_ENTRY) )
                            {
                                pmbad++;
                                P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                           " -> gfn %#lx\n", gfn+i1, mfn+i1,
                                           m2pfn);
                                BUG();
                            }
                        }
                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }

                    l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));

                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                    {
                        p2m_type_t type;

                        type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
                        {
                            if ( type == p2m_populate_on_demand )
                                entry_count++;
                            continue;
                        }
                        mfn = l1e_get_pfn(l1e[i1]);
                        ASSERT(mfn_valid(_mfn(mfn)));
                        m2pfn = get_gpfn_from_mfn(mfn);
                        if ( m2pfn != gfn &&
                             type != p2m_mmio_direct &&
                             !p2m_is_grant(type) &&
                             !p2m_is_shared(type) )
                        {
                            pmbad++;
                            printk("mismatch: gfn %#lx -> mfn %#lx"
                                   " -> gfn %#lx\n", gfn, mfn, m2pfn);
                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                       " -> gfn %#lx\n", gfn, mfn, m2pfn);
                            BUG();
                        }
                    }
                    unmap_domain_page(l1e);
                }
                unmap_domain_page(l2e);
            }
#if CONFIG_PAGING_LEVELS >= 4
            unmap_domain_page(l3e);
        }
#endif

#if CONFIG_PAGING_LEVELS == 4
        unmap_domain_page(l4e);
#else /* CONFIG_PAGING_LEVELS == 3 */
        unmap_domain_page(l3e);
#endif

    }

    if ( entry_count != p2m->pod.entry_count )
    {
        printk("%s: refcounted entry count %ld, audit count %lu!\n",
               __func__,
               p2m->pod.entry_count,
               entry_count);
        BUG();
    }

    return pmbad;
}
Esempio n. 26
0
static mfn_t
p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, 
               p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
               unsigned int *page_order)
{
    mfn_t mfn;
    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
    l2_pgentry_t *l2e;
    l1_pgentry_t *l1e;
    unsigned long l1e_flags;
    p2m_type_t l1t;

    ASSERT(paging_mode_translate(p2m->domain));

    /* XXX This is for compatibility with the old model, where anything not 
     * XXX marked as RAM was considered to be emulated MMIO space.
     * XXX Once we start explicitly registering MMIO regions in the p2m 
     * XXX we will return p2m_invalid for unmapped gfns */
    *t = p2m_mmio_dm;
    /* Not implemented except with EPT */
    *a = p2m_access_rwx; 

    if ( gfn > p2m->max_mapped_pfn )
        /* This pfn is higher than the highest the p2m map currently holds */
        return _mfn(INVALID_MFN);

    /* Use the fast path with the linear mapping if we can */
    if ( p2m == p2m_get_hostp2m(current->domain) )
        return p2m_gfn_to_mfn_current(p2m, gfn, t, a, q, page_order);

    mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));

#if CONFIG_PAGING_LEVELS >= 4
    {
        l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
        l4e += l4_table_offset(addr);
        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
        {
            unmap_domain_page(l4e);
            return _mfn(INVALID_MFN);
        }
        mfn = _mfn(l4e_get_pfn(*l4e));
        unmap_domain_page(l4e);
    }
#endif
    {
        l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
#if CONFIG_PAGING_LEVELS == 3
        /* On PAE hosts the p2m has eight l3 entries, not four (see
         * shadow_set_p2m_entry()) so we can't use l3_table_offset.
         * Instead, just count the number of l3es from zero.  It's safe
         * to do this because we already checked that the gfn is within
         * the bounds of the p2m. */
        l3e += (addr >> L3_PAGETABLE_SHIFT);
#else
        l3e += l3_table_offset(addr);
#endif
pod_retry_l3:
        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
        {
            if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand )
            {
                if ( q & P2M_ALLOC )
                {
                    if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) )
                        goto pod_retry_l3;
                    gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
                }
                else
                    *t = p2m_populate_on_demand;
            }
            unmap_domain_page(l3e);
            return _mfn(INVALID_MFN);
        }
        else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
        {
            mfn = _mfn(l3e_get_pfn(*l3e) +
                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
                       l1_table_offset(addr));
            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
            unmap_domain_page(l3e);

            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
            if ( page_order )
                *page_order = PAGE_ORDER_1G;
            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
        }

        mfn = _mfn(l3e_get_pfn(*l3e));
        unmap_domain_page(l3e);
    }

    l2e = map_domain_page(mfn_x(mfn));
    l2e += l2_table_offset(addr);

pod_retry_l2:
    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
    {
        /* PoD: Try to populate a 2-meg chunk */
        if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
        {
            if ( q & P2M_ALLOC ) {
                if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) )
                    goto pod_retry_l2;
            } else
                *t = p2m_populate_on_demand;
        }
    
        unmap_domain_page(l2e);
        return _mfn(INVALID_MFN);
    }
    else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
    {
        mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
        *t = p2m_flags_to_type(l2e_get_flags(*l2e));
        unmap_domain_page(l2e);
        
        ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
        if ( page_order )
            *page_order = PAGE_ORDER_2M;
        return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
    }

    mfn = _mfn(l2e_get_pfn(*l2e));
    unmap_domain_page(l2e);

    l1e = map_domain_page(mfn_x(mfn));
    l1e += l1_table_offset(addr);
pod_retry_l1:
    l1e_flags = l1e_get_flags(*l1e);
    l1t = p2m_flags_to_type(l1e_flags);
    if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) )
    {
        /* PoD: Try to populate */
        if ( l1t == p2m_populate_on_demand )
        {
            if ( q & P2M_ALLOC ) {
                if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) )
                    goto pod_retry_l1;
            } else
                *t = p2m_populate_on_demand;
        }
    
        unmap_domain_page(l1e);
        return _mfn(INVALID_MFN);
    }
    mfn = _mfn(l1e_get_pfn(*l1e));
    *t = l1t;
    unmap_domain_page(l1e);

    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t));
    if ( page_order )
        *page_order = PAGE_ORDER_4K;
    return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
}
Esempio n. 27
0
/* Read the current domain's p2m table (through the linear mapping). */
static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m, 
                                    unsigned long gfn, p2m_type_t *t, 
                                    p2m_access_t *a, p2m_query_t q,
                                    unsigned int *page_order)
{
    mfn_t mfn = _mfn(INVALID_MFN);
    p2m_type_t p2mt = p2m_mmio_dm;
    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
    /* XXX This is for compatibility with the old model, where anything not 
     * XXX marked as RAM was considered to be emulated MMIO space.
     * XXX Once we start explicitly registering MMIO regions in the p2m 
     * XXX we will return p2m_invalid for unmapped gfns */

    l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
    l2_pgentry_t l2e = l2e_empty();
    int ret;
#if CONFIG_PAGING_LEVELS >= 4
    l3_pgentry_t l3e = l3e_empty();
#endif

    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
           / sizeof(l1_pgentry_t));

#if CONFIG_PAGING_LEVELS >= 4
    /*
     * Read & process L3
     */
    p2m_entry = (l1_pgentry_t *)
        &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
                           + l3_linear_offset(addr)];
pod_retry_l3:
    ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));

    if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    {
        if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
             (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
        {
            /* The read has succeeded, so we know that mapping exists */
            if ( q & P2M_ALLOC )
            {
                if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) )
                    goto pod_retry_l3;
                p2mt = p2m_invalid;
                gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
                goto out;
            }
            else
            {
                p2mt = p2m_populate_on_demand;
                goto out;
            }
        }
        goto pod_retry_l2;
    }

    if ( l3e_get_flags(l3e) & _PAGE_PSE )
    {
        p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
        ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
        if (p2m_is_valid(p2mt) )
            mfn = _mfn(l3e_get_pfn(l3e) + 
                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
                       l1_table_offset(addr));
        else
            p2mt = p2m_mmio_dm;
            
        if ( page_order )
            *page_order = PAGE_ORDER_1G;
        goto out;
    }
#endif
    /*
     * Read & process L2
     */
    p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
                                   + l2_linear_offset(addr)];

pod_retry_l2:
    ret = __copy_from_user(&l2e,
                           p2m_entry,
                           sizeof(l2e));
    if ( ret != 0
         || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    {
        if( (l2e_get_flags(l2e) & _PAGE_PSE)
            && ( p2m_flags_to_type(l2e_get_flags(l2e))
                 == p2m_populate_on_demand ) )
        {
            /* The read has succeeded, so we know that the mapping
             * exits at this point.  */
            if ( q & P2M_ALLOC )
            {
                if ( !p2m_pod_demand_populate(p2m, gfn, 
                                                PAGE_ORDER_2M, q) )
                    goto pod_retry_l2;

                /* Allocate failed. */
                p2mt = p2m_invalid;
                printk("%s: Allocate failed!\n", __func__);
                goto out;
            }
            else
            {
                p2mt = p2m_populate_on_demand;
                goto out;
            }
        }

        goto pod_retry_l1;
    }
        
    if (l2e_get_flags(l2e) & _PAGE_PSE)
    {
        p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
        ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));

        if ( p2m_is_valid(p2mt) )
            mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
        else
            p2mt = p2m_mmio_dm;

        if ( page_order )
            *page_order = PAGE_ORDER_2M;
        goto out;
    }

    /*
     * Read and process L1
     */

    /* Need to __copy_from_user because the p2m is sparse and this
     * part might not exist */
pod_retry_l1:
    p2m_entry = &phys_to_machine_mapping[gfn];

    ret = __copy_from_user(&l1e,
                           p2m_entry,
                           sizeof(l1e));
            
    if ( ret == 0 ) {
        unsigned long l1e_mfn = l1e_get_pfn(l1e);
        p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
        ASSERT( mfn_valid(_mfn(l1e_mfn)) || !p2m_is_ram(p2mt) ||
                p2m_is_paging(p2mt) );

        if ( p2mt == p2m_populate_on_demand )
        {
            /* The read has succeeded, so we know that the mapping
             * exits at this point.  */
            if ( q & P2M_ALLOC )
            {
                if ( !p2m_pod_demand_populate(p2m, gfn, 
                                                PAGE_ORDER_4K, q) )
                    goto pod_retry_l1;

                /* Allocate failed. */
                p2mt = p2m_invalid;
                goto out;
            }
            else
            {
                p2mt = p2m_populate_on_demand;
                goto out;
            }
        }

        if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
            mfn = _mfn(l1e_mfn);
        else 
            /* XXX see above */
            p2mt = p2m_mmio_dm;
    }
    
    if ( page_order )
        *page_order = PAGE_ORDER_4K;
out:
    *t = p2mt;
    return mfn;
}
Esempio n. 28
0
// Returns 0 on error (out of memory)
static int
p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, 
              unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
{
    // XXX -- this might be able to be faster iff current->domain == d
    mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
    void *table =map_domain_page(mfn_x(table_mfn));
    unsigned long i, gfn_remainder = gfn;
    l1_pgentry_t *p2m_entry;
    l1_pgentry_t entry_content;
    l2_pgentry_t l2e_content;
    l3_pgentry_t l3e_content;
    int rv=0;
    unsigned int iommu_pte_flags = (p2mt == p2m_ram_rw) ?
                                   IOMMUF_readable|IOMMUF_writable:
                                   0; 
    unsigned long old_mfn = 0;

    if ( tb_init_done )
    {
        struct {
            u64 gfn, mfn;
            int p2mt;
            int d:16,order:16;
        } t;

        t.gfn = gfn;
        t.mfn = mfn_x(mfn);
        t.p2mt = p2mt;
        t.d = p2m->domain->domain_id;
        t.order = page_order;

        __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
    }

#if CONFIG_PAGING_LEVELS >= 4
    if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn,
                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
        goto out;
#endif
    /*
     * Try to allocate 1GB page table if this feature is supported.
     */
    if ( page_order == PAGE_ORDER_1G )
    {
        l1_pgentry_t old_entry = l1e_empty();
        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                   L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                                   L3_PAGETABLE_ENTRIES);
        ASSERT(p2m_entry);
        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
        {
            /* We're replacing a non-SP page with a superpage.  Make sure to
             * handle freeing the table properly. */
            old_entry = *p2m_entry;
        }

        ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
        l3e_content = mfn_valid(mfn) 
            ? l3e_from_pfn(mfn_x(mfn),
                           p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE)
            : l3e_empty();
        entry_content.l1 = l3e_content.l3;

        if ( entry_content.l1 != 0 )
        {
            p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
            old_mfn = l1e_get_pfn(*p2m_entry);
        }

        p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3);
        /* NB: paging_write_p2m_entry() handles tlb flushes properly */

        /* Free old intermediate tables if necessary */
        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
            p2m_free_entry(p2m, &old_entry, page_order);
    }
    /*
     * When using PAE Xen, we only allow 33 bits of pseudo-physical
     * address in translated guests (i.e. 8 GBytes).  This restriction
     * comes from wanting to map the P2M table into the 16MB RO_MPT hole
     * in Xen's address space for translated PV guests.
     * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
     */
    else if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn,
                              L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                              ((CONFIG_PAGING_LEVELS == 3)
                               ? (hap_enabled(p2m->domain) ? 4 : 8)
                               : L3_PAGETABLE_ENTRIES),
                              PGT_l2_page_table) )
        goto out;

    if ( page_order == PAGE_ORDER_4K )
    {
        if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn,
                             L2_PAGETABLE_SHIFT - PAGE_SHIFT,
                             L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
            goto out;

        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                   0, L1_PAGETABLE_ENTRIES);
        ASSERT(p2m_entry);
        
        if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct)
                            || p2m_is_paging(p2mt) )
            entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
                                             p2m_type_to_flags(p2mt, mfn));
        else
            entry_content = l1e_empty();

        if ( entry_content.l1 != 0 )
        {
            p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
            old_mfn = l1e_get_pfn(*p2m_entry);
        }
        /* level 1 entry */
        p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1);
        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
    }
    else if ( page_order == PAGE_ORDER_2M )
    {
        l1_pgentry_t old_entry = l1e_empty();
        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                   L2_PAGETABLE_SHIFT - PAGE_SHIFT,
                                   L2_PAGETABLE_ENTRIES);
        ASSERT(p2m_entry);
        
        /* FIXME: Deal with 4k replaced by 2meg pages */
        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
        {
            /* We're replacing a non-SP page with a superpage.  Make sure to
             * handle freeing the table properly. */
            old_entry = *p2m_entry;
        }
        
        ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
        if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
            l2e_content = l2e_from_pfn(mfn_x(mfn),
                                       p2m_type_to_flags(p2mt, mfn) |
                                       _PAGE_PSE);
        else
            l2e_content = l2e_empty();
        
        entry_content.l1 = l2e_content.l2;

        if ( entry_content.l1 != 0 )
        {
            p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
            old_mfn = l1e_get_pfn(*p2m_entry);
        }

        p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2);
        /* NB: paging_write_p2m_entry() handles tlb flushes properly */

        /* Free old intermediate tables if necessary */
        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
            p2m_free_entry(p2m, &old_entry, page_order);
    }

    /* Track the highest gfn for which we have ever had a valid mapping */
    if ( p2mt != p2m_invalid
         && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) )
        p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;

    if ( iommu_enabled && need_iommu(p2m->domain) )
    {
        if ( iommu_hap_pt_share )
        {
            if ( old_mfn && (old_mfn != mfn_x(mfn)) )
                amd_iommu_flush_pages(p2m->domain, gfn, page_order);
        }
        else
        {
            if ( p2mt == p2m_ram_rw )
                for ( i = 0; i < (1UL << page_order); i++ )
                    iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i,
                                   IOMMUF_readable|IOMMUF_writable);
            else
                for ( int i = 0; i < (1UL << page_order); i++ )
                    iommu_unmap_page(p2m->domain, gfn+i);
        }
    }

    /* Success */
    rv = 1;

out:
    unmap_domain_page(table);
    return rv;
}
Esempio n. 29
0
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
    struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
    paddr_t ga, uint32_t *pfec, unsigned int *page_order)
{
    uint32_t missing;
    mfn_t top_mfn;
    void *top_map;
    p2m_type_t p2mt;
    walk_t gw;
    unsigned long top_gfn;
    struct page_info *top_page;

    /* Get the top-level table's MFN */
    top_gfn = cr3 >> PAGE_SHIFT;
    top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn,
                                     &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        ASSERT(p2m_is_hostp2m(p2m));
        pfec[0] = PFEC_page_paged;
        if ( top_page )
            put_page(top_page);
        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
        return gfn_x(INVALID_GFN);
    }
    if ( p2m_is_shared(p2mt) )
    {
        pfec[0] = PFEC_page_shared;
        if ( top_page )
            put_page(top_page);
        return gfn_x(INVALID_GFN);
    }
    if ( !top_page )
    {
        pfec[0] &= ~PFEC_page_present;
        goto out_tweak_pfec;
    }
    top_mfn = _mfn(page_to_mfn(top_page));

    /* Map the top-level table and call the tree-walker */
    ASSERT(mfn_valid(top_mfn));
    top_map = map_domain_page(top_mfn);
#if GUEST_PAGING_LEVELS == 3
    top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
    missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map);
    unmap_domain_page(top_map);
    put_page(top_page);

    /* Interpret the answer */
    if ( missing == 0 )
    {
        gfn_t gfn = guest_walk_to_gfn(&gw);
        struct page_info *page;
        page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt,
                                     NULL, P2M_ALLOC | P2M_UNSHARE);
        if ( page )
            put_page(page);
        if ( p2m_is_paging(p2mt) )
        {
            ASSERT(p2m_is_hostp2m(p2m));
            pfec[0] = PFEC_page_paged;
            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
            return gfn_x(INVALID_GFN);
        }
        if ( p2m_is_shared(p2mt) )
        {
            pfec[0] = PFEC_page_shared;
            return gfn_x(INVALID_GFN);
        }

        if ( page_order )
            *page_order = guest_walk_to_page_order(&gw);

        return gfn_x(gfn);
    }

    if ( missing & _PAGE_PRESENT )
        pfec[0] &= ~PFEC_page_present;

    if ( missing & _PAGE_INVALID_BITS ) 
        pfec[0] |= PFEC_reserved_bit;

    if ( missing & _PAGE_PKEY_BITS )
        pfec[0] |= PFEC_prot_key;

    if ( missing & _PAGE_PAGED )
        pfec[0] = PFEC_page_paged;

    if ( missing & _PAGE_SHARED )
        pfec[0] = PFEC_page_shared;

 out_tweak_pfec:
    /*
     * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
     * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
     */
    if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) )
        pfec[0] &= ~PFEC_insn_fetch;

    return gfn_x(INVALID_GFN);
}
Esempio n. 30
0
/*
 * Returns 0 if TLB flush / invalidate required by caller.
 * va will indicate the address to be invalidated.
 *
 * addr is _either_ a host virtual address, or the address of the pte to
 * update, as indicated by the GNTMAP_contains_pte flag.
 */
static void
__gnttab_map_grant_ref(
    struct gnttab_map_grant_ref *op)
{
    struct domain *ld, *rd;
    struct vcpu   *led;
    int            handle;
    unsigned long  frame = 0;
    int            rc = GNTST_okay;
    unsigned int   cache_flags;
    struct active_grant_entry *act;
    struct grant_mapping *mt;
    grant_entry_t *sha;
    union grant_combo scombo, prev_scombo, new_scombo;

    /*
     * We bound the number of times we retry CMPXCHG on memory locations that
     * we share with a guest OS. The reason is that the guest can modify that
     * location at a higher rate than we can read-modify-CMPXCHG, so the guest
     * could cause us to livelock. There are a few cases where it is valid for
     * the guest to race our updates (e.g., to change the GTF_readonly flag),
     * so we allow a few retries before failing.
     */
    int retries = 0;

    led = current;
    ld = led->domain;

    if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
    {
        gdprintk(XENLOG_INFO, "Bad flags in grant map op (%x).\n", op->flags);
        op->status = GNTST_bad_gntref;
        return;
    }

    if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) )
    {
        gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom);
        op->status = GNTST_bad_domain;
        return;
    }

    rc = xsm_grant_mapref(ld, rd, op->flags);
    if ( rc )
    {
        rcu_unlock_domain(rd);
        op->status = GNTST_permission_denied;
        return;
    }

    if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
    {
        rcu_unlock_domain(rd);
        gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n");
        op->status = GNTST_no_device_space;
        return;
    }

    spin_lock(&rd->grant_table->lock);

    /* Bounds check on the grant ref */
    if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table)))
        PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);

    act = &active_entry(rd->grant_table, op->ref);
    sha = &shared_entry(rd->grant_table, op->ref);

    /* If already pinned, check the active domid and avoid refcnt overflow. */
    if ( act->pin &&
            ((act->domid != ld->domain_id) ||
             (act->pin & 0x80808080U) != 0) )
        PIN_FAIL(unlock_out, GNTST_general_error,
                 "Bad domain (%d != %d), or risk of counter overflow %08x\n",
                 act->domid, ld->domain_id, act->pin);

    if ( !act->pin ||
            (!(op->flags & GNTMAP_readonly) &&
             !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
    {
        scombo.word = *(u32 *)&sha->flags;

        /*
         * This loop attempts to set the access (reading/writing) flags
         * in the grant table entry.  It tries a cmpxchg on the field
         * up to five times, and then fails under the assumption that
         * the guest is misbehaving.
         */
        for ( ; ; )
        {
            /* If not already pinned, check the grant domid and type. */
            if ( !act->pin &&
                    (((scombo.shorts.flags & GTF_type_mask) !=
                      GTF_permit_access) ||
                     (scombo.shorts.domid != ld->domain_id)) )
                PIN_FAIL(unlock_out, GNTST_general_error,
                         "Bad flags (%x) or dom (%d). (expected dom %d)\n",
                         scombo.shorts.flags, scombo.shorts.domid,
                         ld->domain_id);

            new_scombo = scombo;
            new_scombo.shorts.flags |= GTF_reading;

            if ( !(op->flags & GNTMAP_readonly) )
            {
                new_scombo.shorts.flags |= GTF_writing;
                if ( unlikely(scombo.shorts.flags & GTF_readonly) )
                    PIN_FAIL(unlock_out, GNTST_general_error,
                             "Attempt to write-pin a r/o grant entry.\n");
            }

            prev_scombo.word = cmpxchg((u32 *)&sha->flags,
                                       scombo.word, new_scombo.word);
            if ( likely(prev_scombo.word == scombo.word) )
                break;

            if ( retries++ == 4 )
                PIN_FAIL(unlock_out, GNTST_general_error,
                         "Shared grant entry is unstable.\n");

            scombo = prev_scombo;
        }

        if ( !act->pin )
        {
            act->domid = scombo.shorts.domid;
            act->frame = gmfn_to_mfn(rd, sha->frame);
        }
    }

    if ( op->flags & GNTMAP_device_map )
        act->pin += (op->flags & GNTMAP_readonly) ?
                    GNTPIN_devr_inc : GNTPIN_devw_inc;
    if ( op->flags & GNTMAP_host_map )
        act->pin += (op->flags & GNTMAP_readonly) ?
                    GNTPIN_hstr_inc : GNTPIN_hstw_inc;

    frame = act->frame;

    cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );

    spin_unlock(&rd->grant_table->lock);

    if ( is_iomem_page(frame) )
    {
        if ( !iomem_access_permitted(rd, frame, frame) )
        {
            gdprintk(XENLOG_WARNING,
                     "Iomem mapping not permitted %lx (domain %d)\n",
                     frame, rd->domain_id);
            rc = GNTST_general_error;
            goto undo_out;
        }

        rc = create_grant_host_mapping(
                 op->host_addr, frame, op->flags, cache_flags);
        if ( rc != GNTST_okay )
            goto undo_out;
    }
    else
    {
        if ( unlikely(!mfn_valid(frame)) ||
                unlikely(!(gnttab_host_mapping_get_page_type(op, ld, rd) ?
                           get_page_and_type(mfn_to_page(frame), rd,
                                             PGT_writable_page) :
                           get_page(mfn_to_page(frame), rd))) )
        {
            if ( !rd->is_dying )
                gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n",
                         frame);
            rc = GNTST_general_error;
            goto undo_out;
        }

        if ( op->flags & GNTMAP_host_map )
        {
            rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
            if ( rc != GNTST_okay )
            {
                if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
                    put_page_type(mfn_to_page(frame));
                put_page(mfn_to_page(frame));
                goto undo_out;
            }

            if ( op->flags & GNTMAP_device_map )
            {
                (void)get_page(mfn_to_page(frame), rd);
                if ( !(op->flags & GNTMAP_readonly) )
                    get_page_type(mfn_to_page(frame), PGT_writable_page);
            }
        }
    }

    TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);

    mt = &maptrack_entry(ld->grant_table, handle);
    mt->domid = op->dom;
    mt->ref   = op->ref;
    mt->flags = op->flags;

    op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
    op->handle       = handle;
    op->status       = GNTST_okay;

    rcu_unlock_domain(rd);
    return;

undo_out:
    spin_lock(&rd->grant_table->lock);

    act = &active_entry(rd->grant_table, op->ref);
    sha = &shared_entry(rd->grant_table, op->ref);

    if ( op->flags & GNTMAP_device_map )
        act->pin -= (op->flags & GNTMAP_readonly) ?
                    GNTPIN_devr_inc : GNTPIN_devw_inc;
    if ( op->flags & GNTMAP_host_map )
        act->pin -= (op->flags & GNTMAP_readonly) ?
                    GNTPIN_hstr_inc : GNTPIN_hstw_inc;

    if ( !(op->flags & GNTMAP_readonly) &&
            !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
        gnttab_clear_flag(_GTF_writing, &sha->flags);

    if ( !act->pin )
        gnttab_clear_flag(_GTF_reading, &sha->flags);

unlock_out:
    spin_unlock(&rd->grant_table->lock);
    op->status = rc;
    put_maptrack_handle(ld->grant_table, handle);
    rcu_unlock_domain(rd);
}