Exemplo n.º 1
0
int
set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
{
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
    int rc = 0;
    p2m_access_t a;
    p2m_type_t ot;
    mfn_t omfn;
    unsigned long pg_type;

    if ( !paging_mode_translate(p2m->domain) )
        return 0;

    gfn_lock(p2m, gfn, 0);
    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL);
    /* At the moment we only allow p2m change if gfn has already been made
     * sharable first */
    ASSERT(p2m_is_shared(ot));
    ASSERT(mfn_valid(omfn));
    /* Set the m2p entry to invalid only if there are no further type
     * refs to this page as shared */
    pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
    if ( (pg_type & PGT_count_mask) == 0
         || (pg_type & PGT_type_mask) != PGT_shared_page )
        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);

    P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
    rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared, p2m->default_access);
    gfn_unlock(p2m, gfn, 0);
    if ( 0 == rc )
        gdprintk(XENLOG_ERR,
            "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
            mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)));
    return rc;
}
Exemplo n.º 2
0
static void
p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
                unsigned int page_order)
{
    unsigned long i;
    mfn_t mfn_return;
    p2m_type_t t;
    p2m_access_t a;

    if ( !paging_mode_translate(p2m->domain) )
    {
        if ( need_iommu(p2m->domain) )
            for ( i = 0; i < (1 << page_order); i++ )
                iommu_unmap_page(p2m->domain, mfn + i);
        return;
    }

    ASSERT(gfn_locked_by_me(p2m, gfn));
    P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);

    if ( mfn_valid(_mfn(mfn)) )
    {
        for ( i = 0; i < (1UL << page_order); i++ )
        {
            mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL);
            if ( !p2m_is_grant(t) && !p2m_is_shared(t) )
                set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
            ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
        }
    }
    set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access);
}
Exemplo n.º 3
0
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
#ifdef CONFIG_X86
    p2m_type_t p2mt;
#endif
    unsigned long mfn;

#ifdef CONFIG_X86
    mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); 
    if ( unlikely(p2m_is_paging(p2mt)) )
    {
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn);
        return 1;
    }
#else
    mfn = gmfn_to_mfn(d, gmfn);
#endif
    if ( unlikely(!mfn_valid(mfn)) )
    {
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
    page = mfn_to_page(mfn);
#ifdef CONFIG_X86
    /* If gmfn is shared, just drop the guest reference (which may or may not
     * free the page) */
    if(p2m_is_shared(p2mt))
    {
        put_page_and_type(page);
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        return 1;
    }

#endif /* CONFIG_X86 */
    if ( unlikely(!get_page(page, d)) )
    {
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);
            
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);

    return 1;
}
Exemplo n.º 4
0
/* Atomically look up a GFN and take a reference count on the backing page. */
struct page_info *get_page_from_gfn_p2m(
    struct domain *d, struct p2m_domain *p2m, unsigned long gfn,
    p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
{
    struct page_info *page = NULL;
    p2m_access_t _a;
    p2m_type_t _t;
    mfn_t mfn;

    /* Allow t or a to be NULL */
    t = t ?: &_t;
    a = a ?: &_a;

    if ( likely(!p2m_locked_by_me(p2m)) )
    {
        /* Fast path: look up and get out */
        p2m_read_lock(p2m);
        mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
             && mfn_valid(mfn)
             && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
        {
            page = mfn_to_page(mfn);
            if ( !get_page(page, d)
                 /* Page could be shared */
                 && !get_page(page, dom_cow) )
                page = NULL;
        }
        p2m_read_unlock(p2m);

        if ( page )
            return page;

        /* Error path: not a suitable GFN at all */
        if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
            return NULL;
    }

    /* Slow path: take the write lock and do fixups */
    mfn = get_gfn_type_access(p2m, gfn, t, a, q, NULL);
    if ( p2m_is_ram(*t) && mfn_valid(mfn) )
    {
        page = mfn_to_page(mfn);
        if ( !get_page(page, d) )
            page = NULL;
    }
    put_gfn(d, gfn);

    return page;
}
Exemplo n.º 5
0
mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
                    p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
                    unsigned int *page_order, bool_t locked)
{
    mfn_t mfn;

    /* Unshare makes no sense withuot populate. */
    if ( q & P2M_UNSHARE )
        q |= P2M_ALLOC;

    if ( !p2m || !paging_mode_translate(p2m->domain) )
    {
        /* Not necessarily true, but for non-translated guests, we claim
         * it's the most generic kind of memory */
        *t = p2m_ram_rw;
        return _mfn(gfn);
    }

    if ( locked )
        /* Grab the lock here, don't release until put_gfn */
        gfn_lock(p2m, gfn, 0);

    mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);

    if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
    {
        ASSERT(!p2m_is_nestedp2m(p2m));
        /* Try to unshare. If we fail, communicate ENOMEM without
         * sleeping. */
        if ( mem_sharing_unshare_page(p2m->domain, gfn, 0) < 0 )
            (void)mem_sharing_notify_enomem(p2m->domain, gfn, 0);
        mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
    }

    if (unlikely((p2m_is_broken(*t))))
    {
        /* Return invalid_mfn to avoid caller's access */
        mfn = _mfn(INVALID_MFN);
        if ( q & P2M_ALLOC )
            domain_crash(p2m->domain);
    }

    return mfn;
}
Exemplo n.º 6
0
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
    struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
    paddr_t ga, uint32_t *pfec, unsigned int *page_order)
{
    uint32_t missing;
    mfn_t top_mfn;
    void *top_map;
    p2m_type_t p2mt;
    walk_t gw;
    unsigned long top_gfn;
    struct page_info *top_page;

    /* Get the top-level table's MFN */
    top_gfn = cr3 >> PAGE_SHIFT;
    top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn,
                                     &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        ASSERT(p2m_is_hostp2m(p2m));
        pfec[0] = PFEC_page_paged;
        if ( top_page )
            put_page(top_page);
        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
        return INVALID_GFN;
    }
    if ( p2m_is_shared(p2mt) )
    {
        pfec[0] = PFEC_page_shared;
        if ( top_page )
            put_page(top_page);
        return INVALID_GFN;
    }
    if ( !top_page )
    {
        pfec[0] &= ~PFEC_page_present;
        return INVALID_GFN;
    }
    top_mfn = _mfn(page_to_mfn(top_page));

    /* Map the top-level table and call the tree-walker */
    ASSERT(mfn_valid(mfn_x(top_mfn)));
    top_map = map_domain_page(mfn_x(top_mfn));
#if GUEST_PAGING_LEVELS == 3
    top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
    missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map);
    unmap_domain_page(top_map);
    put_page(top_page);

    /* Interpret the answer */
    if ( missing == 0 )
    {
        gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
        struct page_info *page;
        page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt,
                                     NULL, P2M_ALLOC | P2M_UNSHARE);
        if ( page )
            put_page(page);
        if ( p2m_is_paging(p2mt) )
        {
            ASSERT(p2m_is_hostp2m(p2m));
            pfec[0] = PFEC_page_paged;
            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
            return INVALID_GFN;
        }
        if ( p2m_is_shared(p2mt) )
        {
            pfec[0] = PFEC_page_shared;
            return INVALID_GFN;
        }

        if ( page_order )
            *page_order = guest_walk_to_page_order(&gw);

        return gfn_x(gfn);
    }

    if ( missing & _PAGE_PRESENT )
        pfec[0] &= ~PFEC_page_present;

    if ( missing & _PAGE_INVALID_BITS ) 
        pfec[0] |= PFEC_reserved_bit;

    if ( missing & _PAGE_PAGED )
        pfec[0] = PFEC_page_paged;

    if ( missing & _PAGE_SHARED )
        pfec[0] = PFEC_page_shared;

    return INVALID_GFN;
}
Exemplo n.º 7
0
Arquivo: memory.c Projeto: blue236/xen
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
#ifdef CONFIG_X86
    p2m_type_t p2mt;
#endif
    unsigned long mfn;

#ifdef CONFIG_X86
    mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); 
    if ( unlikely(p2m_is_paging(p2mt)) )
    {
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        put_gfn(d, gmfn);
        /* If the page hasn't yet been paged out, there is an
         * actual page that needs to be released. */
        if ( p2mt == p2m_ram_paging_out )
        {
            ASSERT(mfn_valid(mfn));
            page = mfn_to_page(mfn);
            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
                put_page(page);
        }
        p2m_mem_paging_drop_page(d, gmfn, p2mt);
        return 1;
    }
    if ( p2mt == p2m_mmio_direct )
    {
        clear_mmio_p2m_entry(d, gmfn, _mfn(mfn));
        put_gfn(d, gmfn);
        return 1;
    }
#else
    mfn = gmfn_to_mfn(d, gmfn);
#endif
    if ( unlikely(!mfn_valid(mfn)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
#ifdef CONFIG_X86
    if ( p2m_is_shared(p2mt) )
    {
        /* Unshare the page, bail out on error. We unshare because 
         * we might be the only one using this shared page, and we
         * need to trigger proper cleanup. Once done, this is 
         * like any other page. */
        if ( mem_sharing_unshare_page(d, gmfn, 0) )
        {
            put_gfn(d, gmfn);
            (void)mem_sharing_notify_enomem(d, gmfn, 0);
            return 0;
        }
        /* Maybe the mfn changed */
        mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt));
        ASSERT(!p2m_is_shared(p2mt));
    }
#endif /* CONFIG_X86 */

    page = mfn_to_page(mfn);
    if ( unlikely(!get_page(page, d)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);

    /*
     * With the lack of an IOMMU on some platforms, domains with DMA-capable
     * device must retrieve the same pfn when the hypercall populate_physmap
     * is called.
     *
     * For this purpose (and to match populate_physmap() behavior), the page
     * is kept allocated.
     */
    if ( !is_domain_direct_mapped(d) &&
         test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);
    put_gfn(d, gmfn);

    return 1;
}
Exemplo n.º 8
0
static int hvmemul_do_io(
    int is_mmio, paddr_t addr, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    struct vcpu *curr = current;
    struct hvm_vcpu_io *vio;
    ioreq_t p = {
        .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
        .addr = addr,
        .size = size,
        .dir = dir,
        .df = df,
        .data = ram_gpa,
        .data_is_ptr = (p_data == NULL),
    };
    unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
    p2m_type_t p2mt;
    struct page_info *ram_page;
    int rc;

    /* Check for paged out page */
    ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        p2m_mem_paging_populate(curr->domain, ram_gfn);
        return X86EMUL_RETRY;
    }
    if ( p2m_is_shared(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_RETRY;
    }

    /*
     * Weird-sized accesses have undefined behaviour: we discard writes
     * and read all-ones.
     */
    if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
    {
        gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
        ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
        if ( dir == IOREQ_READ )
            memset(p_data, ~0, size);
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( !p.data_is_ptr && (dir == IOREQ_WRITE) )
    {
        memcpy(&p.data, p_data, size);
        p_data = NULL;
    }

    vio = &curr->arch.hvm_vcpu.hvm_io;

    if ( is_mmio && !p.data_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                memcpy(p_data, &vio->mmio_large_read[addr - pa],
                       size);
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
    }

    switch ( vio->io_state )
    {
    case HVMIO_none:
        break;
    case HVMIO_completed:
        vio->io_state = HVMIO_none;
        if ( p_data == NULL )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_UNHANDLEABLE;
        }
        goto finish_access;
    case HVMIO_dispatched:
        /* May have to wait for previous cycle of a multi-write to complete. */
        if ( is_mmio && !p.data_is_ptr && (dir == IOREQ_WRITE) &&
             (addr == (vio->mmio_large_write_pa +
                       vio->mmio_large_write_bytes)) )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_RETRY;
        }
    default:
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( hvm_io_pending(curr) )
    {
        gdprintk(XENLOG_WARNING, "WARNING: io already pending?\n");
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    vio->io_state =
        (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
    vio->io_size = size;

    /*
     * When retrying a repeated string instruction, force exit to guest after
     * completion of the retried iteration to allow handling of interrupts.
     */
    if ( vio->mmio_retrying )
        *reps = 1;

    p.count = *reps;

    if ( dir == IOREQ_WRITE )
        hvmtrace_io_assist(is_mmio, &p);

    if ( is_mmio )
    {
        rc = hvm_mmio_intercept(&p);
        if ( rc == X86EMUL_UNHANDLEABLE )
            rc = hvm_buffered_io_intercept(&p);
    }
    else
    {
        rc = hvm_portio_intercept(&p);
    }

    switch ( rc )
    {
    case X86EMUL_OKAY:
    case X86EMUL_RETRY:
        *reps = p.count;
        p.state = STATE_IORESP_READY;
        if ( !vio->mmio_retry )
        {
            hvm_io_assist(&p);
            vio->io_state = HVMIO_none;
        }
        else
            /* Defer hvm_io_assist() invocation to hvm_do_resume(). */
            vio->io_state = HVMIO_handle_mmio_awaiting_completion;
        break;
    case X86EMUL_UNHANDLEABLE:
        /* If there is no backing DM, just ignore accesses */
        if ( !hvm_has_dm(curr->domain) )
        {
            rc = X86EMUL_OKAY;
            vio->io_state = HVMIO_none;
        }
        else
        {
            rc = X86EMUL_RETRY;
            if ( !hvm_send_assist_req(&p) )
                vio->io_state = HVMIO_none;
            else if ( p_data == NULL )
                rc = X86EMUL_OKAY;
        }
        break;
    default:
        BUG();
    }

    if ( rc != X86EMUL_OKAY )
    {
        if ( ram_page )
            put_page(ram_page);
        return rc;
    }

 finish_access:
    if ( dir == IOREQ_READ )
        hvmtrace_io_assist(is_mmio, &p);

    if ( p_data != NULL )
        memcpy(p_data, &vio->io_data, size);

    if ( is_mmio && !p.data_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_write_pa = addr;
            if ( addr == (pa + bytes) )
                vio->mmio_large_write_bytes += size;
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_read_pa = addr;
            if ( (addr == (pa + bytes)) &&
                 ((bytes + size) <= sizeof(vio->mmio_large_read)) )
            {
                memcpy(&vio->mmio_large_read[bytes], p_data, size);
                vio->mmio_large_read_bytes += size;
            }
        }
    }

    if ( ram_page )
        put_page(ram_page);
    return X86EMUL_OKAY;
}

int hvmemul_do_pio(
    unsigned long port, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    return hvmemul_do_io(0, port, reps, size, ram_gpa, dir, df, p_data);
}

static int hvmemul_do_mmio(
    paddr_t gpa, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    return hvmemul_do_io(1, gpa, reps, size, ram_gpa, dir, df, p_data);
}

/*
 * Convert addr from linear to physical form, valid over the range
 * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
 * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
 * @pfec indicates the access checks to be performed during page-table walks.
 */
static int hvmemul_linear_to_phys(
    unsigned long addr,
    paddr_t *paddr,
    unsigned int bytes_per_rep,
    unsigned long *reps,
    uint32_t pfec,
    struct hvm_emulate_ctxt *hvmemul_ctxt)
{
    struct vcpu *curr = current;
    unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK;
    int reverse;

    /*
     * Clip repetitions to a sensible maximum. This avoids extensive looping in
     * this function while still amortising the cost of I/O trap-and-emulate.
     */
    *reps = min_t(unsigned long, *reps, 4096);

    /* With no paging it's easy: linear == physical. */
    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
    {
        *paddr = addr;
        return X86EMUL_OKAY;
    }

    /* Reverse mode if this is a backwards multi-iteration string operation. */
    reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1);

    if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) )
    {
        /* Do page-straddling first iteration forwards via recursion. */
        paddr_t _paddr;
        unsigned long one_rep = 1;
        int rc = hvmemul_linear_to_phys(
            addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt);
        if ( rc != X86EMUL_OKAY )
            return rc;
        pfn = _paddr >> PAGE_SHIFT;
    }
    else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
Exemplo n.º 9
0
static int hvmemul_do_io(
    int is_mmio, paddr_t addr, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    paddr_t value = ram_gpa;
    int value_is_ptr = (p_data == NULL);
    struct vcpu *curr = current;
    struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
    ioreq_t *p = get_ioreq(curr);
    unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
    p2m_type_t p2mt;
    mfn_t ram_mfn;
    int rc;

    /* Check for paged out page */
    ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0);
    if ( p2m_is_paging(p2mt) )
    {
        p2m_mem_paging_populate(p2m, ram_gfn);
        return X86EMUL_RETRY;
    }
    if ( p2m_is_shared(p2mt) )
        return X86EMUL_RETRY;

    /*
     * Weird-sized accesses have undefined behaviour: we discard writes
     * and read all-ones.
     */
    if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
    {
        gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
        ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
        if ( dir == IOREQ_READ )
            memset(p_data, ~0, size);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( (p_data != NULL) && (dir == IOREQ_WRITE) )
    {
        memcpy(&value, p_data, size);
        p_data = NULL;
    }

    if ( is_mmio && !value_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
                return X86EMUL_OKAY;
        }
        else
        {
            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                memcpy(p_data, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
                       size);
                return X86EMUL_OKAY;
            }
        }
    }

    switch ( curr->arch.hvm_vcpu.io_state )
    {
    case HVMIO_none:
        break;
    case HVMIO_completed:
        curr->arch.hvm_vcpu.io_state = HVMIO_none;
        if ( p_data == NULL )
            return X86EMUL_UNHANDLEABLE;
        goto finish_access;
    case HVMIO_dispatched:
        /* May have to wait for previous cycle of a multi-write to complete. */
        if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) &&
             (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa +
                       curr->arch.hvm_vcpu.mmio_large_write_bytes)) )
            return X86EMUL_RETRY;
    default:
        return X86EMUL_UNHANDLEABLE;
    }

    if ( p->state != STATE_IOREQ_NONE )
    {
        gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
                 p->state);
        return X86EMUL_UNHANDLEABLE;
    }

    curr->arch.hvm_vcpu.io_state =
        (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
    curr->arch.hvm_vcpu.io_size = size;

    p->dir = dir;
    p->data_is_ptr = value_is_ptr;
    p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
    p->size = size;
    p->addr = addr;
    p->count = *reps;
    p->df = df;
    p->data = value;

    hvmtrace_io_assist(is_mmio, p);

    if ( is_mmio )
    {
        rc = hvm_mmio_intercept(p);
        if ( rc == X86EMUL_UNHANDLEABLE )
            rc = hvm_buffered_io_intercept(p);
    }
    else
    {
        rc = hvm_portio_intercept(p);
    }

    switch ( rc )
    {
    case X86EMUL_OKAY:
    case X86EMUL_RETRY:
        *reps = p->count;
        p->state = STATE_IORESP_READY;
        hvm_io_assist();
        curr->arch.hvm_vcpu.io_state = HVMIO_none;
        break;
    case X86EMUL_UNHANDLEABLE:
        rc = X86EMUL_RETRY;
        if ( !hvm_send_assist_req(curr) )
            curr->arch.hvm_vcpu.io_state = HVMIO_none;
        else if ( p_data == NULL )
            rc = X86EMUL_OKAY;
        break;
    default:
        BUG();
    }

    if ( rc != X86EMUL_OKAY )
        return rc;

 finish_access:
    if ( p_data != NULL )
        memcpy(p_data, &curr->arch.hvm_vcpu.io_data, size);

    if ( is_mmio && !value_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
            if ( bytes == 0 )
                pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr;
            if ( addr == (pa + bytes) )
                curr->arch.hvm_vcpu.mmio_large_write_bytes += size;
        }
        else
        {
            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
            if ( bytes == 0 )
                pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr;
            if ( (addr == (pa + bytes)) &&
                 ((bytes + size) <
                  sizeof(curr->arch.hvm_vcpu.mmio_large_read)) )
            {
                memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
                       p_data, size);
                curr->arch.hvm_vcpu.mmio_large_read_bytes += size;
            }
        }
    }

    return X86EMUL_OKAY;
}
Exemplo n.º 10
0
long p2m_pt_audit_p2m(struct p2m_domain *p2m)
{
    unsigned long entry_count = 0, pmbad = 0;
    unsigned long mfn, gfn, m2pfn;
    int test_linear;
    struct domain *d = p2m->domain;

    ASSERT(p2m_locked_by_me(p2m));
    ASSERT(pod_locked_by_me(p2m));

    test_linear = ( (d == current->domain)
                    && !pagetable_is_null(current->arch.monitor_table) );
    if ( test_linear )
        flush_tlb_local();

    /* Audit part one: walk the domain's p2m table, checking the entries. */
    if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
    {
        l2_pgentry_t *l2e;
        l1_pgentry_t *l1e;
        int i1, i2;

#if CONFIG_PAGING_LEVELS == 4
        l4_pgentry_t *l4e;
        l3_pgentry_t *l3e;
        int i4, i3;
        l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
#else /* CONFIG_PAGING_LEVELS == 3 */
        l3_pgentry_t *l3e;
        int i3;
        l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
#endif

        gfn = 0;
#if CONFIG_PAGING_LEVELS >= 4
        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
        {
            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
            {
                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
                continue;
            }
            l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
#endif
            for ( i3 = 0;
                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
                  i3++ )
            {
                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
                {
                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                    continue;
                }

                /* check for 1GB super page */
                if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
                {
                    mfn = l3e_get_pfn(l3e[i3]);
                    ASSERT(mfn_valid(_mfn(mfn)));
                    /* we have to cover 512x512 4K pages */
                    for ( i2 = 0; 
                          i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
                          i2++)
                    {
                        m2pfn = get_gpfn_from_mfn(mfn+i2);
                        if ( m2pfn != (gfn + i2) )
                        {
                            pmbad++;
                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                       " -> gfn %#lx\n", gfn+i2, mfn+i2,
                                       m2pfn);
                            BUG();
                        }
                        gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }
                }

                l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
                {
                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
                    {
                        if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
                             && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
                                  == p2m_populate_on_demand ) )
                            entry_count+=SUPERPAGE_PAGES;
                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }
                    
                    /* check for super page */
                    if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
                    {
                        mfn = l2e_get_pfn(l2e[i2]);
                        ASSERT(mfn_valid(_mfn(mfn)));
                        for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
                        {
                            m2pfn = get_gpfn_from_mfn(mfn+i1);
                            /* Allow shared M2Ps */
                            if ( (m2pfn != (gfn + i1)) &&
                                 (m2pfn != SHARED_M2P_ENTRY) )
                            {
                                pmbad++;
                                P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                           " -> gfn %#lx\n", gfn+i1, mfn+i1,
                                           m2pfn);
                                BUG();
                            }
                        }
                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                        continue;
                    }

                    l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));

                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                    {
                        p2m_type_t type;

                        type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
                        {
                            if ( type == p2m_populate_on_demand )
                                entry_count++;
                            continue;
                        }
                        mfn = l1e_get_pfn(l1e[i1]);
                        ASSERT(mfn_valid(_mfn(mfn)));
                        m2pfn = get_gpfn_from_mfn(mfn);
                        if ( m2pfn != gfn &&
                             type != p2m_mmio_direct &&
                             !p2m_is_grant(type) &&
                             !p2m_is_shared(type) )
                        {
                            pmbad++;
                            printk("mismatch: gfn %#lx -> mfn %#lx"
                                   " -> gfn %#lx\n", gfn, mfn, m2pfn);
                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                       " -> gfn %#lx\n", gfn, mfn, m2pfn);
                            BUG();
                        }
                    }
                    unmap_domain_page(l1e);
                }
                unmap_domain_page(l2e);
            }
#if CONFIG_PAGING_LEVELS >= 4
            unmap_domain_page(l3e);
        }
#endif

#if CONFIG_PAGING_LEVELS == 4
        unmap_domain_page(l4e);
#else /* CONFIG_PAGING_LEVELS == 3 */
        unmap_domain_page(l3e);
#endif

    }

    if ( entry_count != p2m->pod.entry_count )
    {
        printk("%s: refcounted entry count %ld, audit count %lu!\n",
               __func__,
               p2m->pod.entry_count,
               entry_count);
        BUG();
    }

    return pmbad;
}
Exemplo n.º 11
0
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
    struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
    paddr_t ga, uint32_t *pfec, unsigned int *page_order)
{
    uint32_t missing;
    mfn_t top_mfn;
    void *top_map;
    p2m_type_t p2mt;
    walk_t gw;
    unsigned long top_gfn;
    struct page_info *top_page;

    /* Get the top-level table's MFN */
    top_gfn = cr3 >> PAGE_SHIFT;
    top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn,
                                     &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        ASSERT(p2m_is_hostp2m(p2m));
        pfec[0] = PFEC_page_paged;
        if ( top_page )
            put_page(top_page);
        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
        return gfn_x(INVALID_GFN);
    }
    if ( p2m_is_shared(p2mt) )
    {
        pfec[0] = PFEC_page_shared;
        if ( top_page )
            put_page(top_page);
        return gfn_x(INVALID_GFN);
    }
    if ( !top_page )
    {
        pfec[0] &= ~PFEC_page_present;
        goto out_tweak_pfec;
    }
    top_mfn = _mfn(page_to_mfn(top_page));

    /* Map the top-level table and call the tree-walker */
    ASSERT(mfn_valid(top_mfn));
    top_map = map_domain_page(top_mfn);
#if GUEST_PAGING_LEVELS == 3
    top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
    missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map);
    unmap_domain_page(top_map);
    put_page(top_page);

    /* Interpret the answer */
    if ( missing == 0 )
    {
        gfn_t gfn = guest_walk_to_gfn(&gw);
        struct page_info *page;
        page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt,
                                     NULL, P2M_ALLOC | P2M_UNSHARE);
        if ( page )
            put_page(page);
        if ( p2m_is_paging(p2mt) )
        {
            ASSERT(p2m_is_hostp2m(p2m));
            pfec[0] = PFEC_page_paged;
            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
            return gfn_x(INVALID_GFN);
        }
        if ( p2m_is_shared(p2mt) )
        {
            pfec[0] = PFEC_page_shared;
            return gfn_x(INVALID_GFN);
        }

        if ( page_order )
            *page_order = guest_walk_to_page_order(&gw);

        return gfn_x(gfn);
    }

    if ( missing & _PAGE_PRESENT )
        pfec[0] &= ~PFEC_page_present;

    if ( missing & _PAGE_INVALID_BITS ) 
        pfec[0] |= PFEC_reserved_bit;

    if ( missing & _PAGE_PKEY_BITS )
        pfec[0] |= PFEC_prot_key;

    if ( missing & _PAGE_PAGED )
        pfec[0] = PFEC_page_paged;

    if ( missing & _PAGE_SHARED )
        pfec[0] = PFEC_page_shared;

 out_tweak_pfec:
    /*
     * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
     * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
     */
    if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) )
        pfec[0] &= ~PFEC_insn_fetch;

    return gfn_x(INVALID_GFN);
}
Exemplo n.º 12
0
int guest_remove_page(struct domain *d, unsigned long gmfn)
{
    struct page_info *page;
#ifdef CONFIG_X86
    p2m_type_t p2mt;
#endif
    unsigned long mfn;

#ifdef CONFIG_X86
    mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); 
    if ( unlikely(p2m_is_paging(p2mt)) )
    {
        guest_physmap_remove_page(d, gmfn, mfn, 0);
        put_gfn(d, gmfn);
        /* If the page hasn't yet been paged out, there is an
         * actual page that needs to be released. */
        if ( p2mt == p2m_ram_paging_out )
        {
            ASSERT(mfn_valid(mfn));
            page = mfn_to_page(mfn);
            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
                put_page(page);
        }
        p2m_mem_paging_drop_page(d, gmfn, p2mt);
        return 1;
    }
#else
    mfn = gmfn_to_mfn(d, gmfn);
#endif
    if ( unlikely(!mfn_valid(mfn)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
                d->domain_id, gmfn);
        return 0;
    }
            
#ifdef CONFIG_X86_64
    if ( p2m_is_shared(p2mt) )
    {
        /* Unshare the page, bail out on error. We unshare because 
         * we might be the only one using this shared page, and we
         * need to trigger proper cleanup. Once done, this is 
         * like any other page. */
        if ( mem_sharing_unshare_page(d, gmfn, 0) )
        {
            put_gfn(d, gmfn);
            (void)mem_sharing_notify_enomem(d, gmfn, 0);
            return 0;
        }
        /* Maybe the mfn changed */
        mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt));
        ASSERT(!p2m_is_shared(p2mt));
    }
#endif /* CONFIG_X86_64 */

    page = mfn_to_page(mfn);
    if ( unlikely(!get_page(page, d)) )
    {
        put_gfn(d, gmfn);
        gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
        return 0;
    }

    if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
        put_page_and_type(page);
            
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

    guest_physmap_remove_page(d, gmfn, mfn, 0);

    put_page(page);
    put_gfn(d, gmfn);

    return 1;
}
Exemplo n.º 13
0
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
                        unsigned long mfn, unsigned int page_order, 
                        p2m_type_t t)
{
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
    unsigned long i, ogfn;
    p2m_type_t ot;
    p2m_access_t a;
    mfn_t omfn;
    int pod_count = 0;
    int rc = 0;

    if ( !paging_mode_translate(d) )
    {
        if ( need_iommu(d) && t == p2m_ram_rw )
        {
            for ( i = 0; i < (1 << page_order); i++ )
            {
                rc = iommu_map_page(
                    d, mfn + i, mfn + i, IOMMUF_readable|IOMMUF_writable);
                if ( rc != 0 )
                {
                    while ( i-- > 0 )
                        iommu_unmap_page(d, mfn + i);
                    return rc;
                }
            }
        }
        return 0;
    }

    p2m_lock(p2m);

    P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);

    /* First, remove m->p mappings for existing p->m mappings */
    for ( i = 0; i < (1UL << page_order); i++ )
    {
        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
        if ( p2m_is_shared(ot) )
        {
            /* Do an unshare to cleanly take care of all corner 
             * cases. */
            int rc;
            rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0);
            if ( rc )
            {
                p2m_unlock(p2m);
                /* NOTE: Should a guest domain bring this upon itself,
                 * there is not a whole lot we can do. We are buried
                 * deep in locks from most code paths by now. So, fail
                 * the call and don't try to sleep on a wait queue
                 * while placing the mem event.
                 *
                 * However, all current (changeset 3432abcf9380) code
                 * paths avoid this unsavoury situation. For now.
                 *
                 * Foreign domains are okay to place an event as they 
                 * won't go to sleep. */
                (void)mem_sharing_notify_enomem(p2m->domain, gfn + i, 0);
                return rc;
            }
            omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
            ASSERT(!p2m_is_shared(ot));
        }
        if ( p2m_is_grant(ot) )
        {
            /* Really shouldn't be unmapping grant maps this way */
            domain_crash(d);
            p2m_unlock(p2m);
            
            return -EINVAL;
        }
        else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
        {
            ASSERT(mfn_valid(omfn));
            set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
        }
        else if ( ot == p2m_populate_on_demand )
        {
            /* Count how man PoD entries we'll be replacing if successful */
            pod_count++;
        }
        else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
        {
            /* We're plugging a hole in the physmap where a paged out page was */
            atomic_dec(&d->paged_pages);
        }
    }

    /* Then, look for m->p mappings for this range and deal with them */
    for ( i = 0; i < (1UL << page_order); i++ )
    {
        if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) == dom_cow )
        {
            /* This is no way to add a shared page to your physmap! */
            gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom %hu "
                        "physmap not allowed.\n", mfn+i, d->domain_id);
            p2m_unlock(p2m);
            return -EINVAL;
        }
        if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d )
            continue;
        ogfn = mfn_to_gfn(d, _mfn(mfn+i));
        if ( (ogfn != INVALID_M2P_ENTRY) && (ogfn != gfn + i) )
        {
            /* This machine frame is already mapped at another physical
             * address */
            P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                      mfn + i, ogfn, gfn + i);
            omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL);
            if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
            {
                ASSERT(mfn_valid(omfn));
                P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
                          ogfn , mfn_x(omfn));
                if ( mfn_x(omfn) == (mfn + i) )
                    p2m_remove_page(p2m, ogfn, mfn + i, 0);
            }
        }
    }

    /* Now, actually do the two-way mapping */
    if ( mfn_valid(_mfn(mfn)) ) 
    {
        if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t, p2m->default_access) )
        {
            rc = -EINVAL;
            goto out; /* Failed to update p2m, bail without updating m2p. */
        }
        if ( !p2m_is_grant(t) )
        {
            for ( i = 0; i < (1UL << page_order); i++ )
                set_gpfn_from_mfn(mfn+i, gfn+i);
        }
    }
    else
    {
        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
                 gfn, mfn);
        if ( !set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, 
                            p2m_invalid, p2m->default_access) )
            rc = -EINVAL;
        else
        {
            pod_lock(p2m);
            p2m->pod.entry_count -= pod_count;
            BUG_ON(p2m->pod.entry_count < 0);
            pod_unlock(p2m);
        }
    }

out:
    p2m_unlock(p2m);

    return rc;
}