Exemple #1
0
static void enable_hypercall_page(struct domain *d)
{
    unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
    struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
    uint8_t *p;

    if ( !page || !get_page_type(page, PGT_writable_page) )
    {
        if ( page )
            put_page(page);
        gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
                 gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
        return;
    }

    p = __map_domain_page(page);

    /*
     * We set the bit 31 in %eax (reserved field in the Viridian hypercall
     * calling convention) to differentiate Xen and Viridian hypercalls.
     */
    *(u8  *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */
    *(u32 *)(p + 1) = 0x80000000;
    *(u8  *)(p + 5) = 0x0f; /* vmcall/vmmcall */
    *(u8  *)(p + 6) = 0x01;
    *(u8  *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9);
    *(u8  *)(p + 8) = 0xc3; /* ret */
    memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */

    unmap_domain_page(p);

    put_page_and_type(page);
}
Exemple #2
0
static void initialize_apic_assist(struct vcpu *v)
{
    struct domain *d = v->domain;
    unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.fields.pfn;
    struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
    uint8_t *p;

    /*
     * We don't yet make use of the APIC assist page but by setting
     * the CPUID3A_MSR_APIC_ACCESS bit in CPUID leaf 40000003 we are duty
     * bound to support the MSR. We therefore do just enough to keep windows
     * happy.
     *
     * See http://msdn.microsoft.com/en-us/library/ff538657%28VS.85%29.aspx for
     * details of how Windows uses the page.
     */

    if ( !page || !get_page_type(page, PGT_writable_page) )
    {
        if ( page )
            put_page(page);
        gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn,
                 page_to_mfn(page));
        return;
    }

    p = __map_domain_page(page);

    *(u32 *)p = 0;

    unmap_domain_page(p);

    put_page_and_type(page);
}
Exemple #3
0
static void initialize_apic_assist(struct vcpu *v)
{
    struct domain *d = v->domain;
    unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.msr.fields.pfn;
    struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
    void *va;

    /*
     * See section 13.3.4.1 of the specification for details of this
     * enlightenment.
     */

    if ( !page )
        goto fail;

    if ( !get_page_type(page, PGT_writable_page) )
    {
        put_page(page);
        goto fail;
    }

    va = __map_domain_page_global(page);
    if ( !va )
    {
        put_page_and_type(page);
        goto fail;
    }

    *(uint32_t *)va = 0;

    if ( viridian_feature_mask(v->domain) & HVMPV_apic_assist )
    {
        /*
         * If we overwrite an existing address here then something has
         * gone wrong and a domain page will leak. Instead crash the
         * domain to make the problem obvious.
         */
        if ( v->arch.hvm_vcpu.viridian.apic_assist.va )
            domain_crash(d);

        v->arch.hvm_vcpu.viridian.apic_assist.va = va;
        return;
    }

    unmap_domain_page_global(va);
    put_page_and_type(page);
    return;

 fail:
    gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn,
             page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
}
Exemple #4
0
Fichier : vpmu.c Projet : Fantu/Xen
static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
{
    struct vcpu *v;
    struct vpmu_struct *vpmu;
    struct page_info *page;
    uint64_t gfn = params->val;

    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
        return -EINVAL;

    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
        return -EINVAL;

    page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
    if ( !page )
        return -EINVAL;

    if ( !get_page_type(page, PGT_writable_page) )
    {
        put_page(page);
        return -EINVAL;
    }

    v = d->vcpu[params->vcpu];
    vpmu = vcpu_vpmu(v);

    spin_lock(&vpmu->vpmu_lock);

    if ( v->arch.vpmu.xenpmu_data )
    {
        spin_unlock(&vpmu->vpmu_lock);
        put_page_and_type(page);
        return -EEXIST;
    }

    v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
    if ( !v->arch.vpmu.xenpmu_data )
    {
        spin_unlock(&vpmu->vpmu_lock);
        put_page_and_type(page);
        return -ENOMEM;
    }

    vpmu_initialise(v);

    spin_unlock(&vpmu->vpmu_lock);

    return 0;
}
Exemple #5
0
static int hvmemul_do_io(
    int is_mmio, paddr_t addr, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    struct vcpu *curr = current;
    struct hvm_vcpu_io *vio;
    ioreq_t p = {
        .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
        .addr = addr,
        .size = size,
        .dir = dir,
        .df = df,
        .data = ram_gpa,
        .data_is_ptr = (p_data == NULL),
    };
    unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
    p2m_type_t p2mt;
    struct page_info *ram_page;
    int rc;

    /* Check for paged out page */
    ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        p2m_mem_paging_populate(curr->domain, ram_gfn);
        return X86EMUL_RETRY;
    }
    if ( p2m_is_shared(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_RETRY;
    }

    /*
     * Weird-sized accesses have undefined behaviour: we discard writes
     * and read all-ones.
     */
    if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
    {
        gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
        ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
        if ( dir == IOREQ_READ )
            memset(p_data, ~0, size);
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( !p.data_is_ptr && (dir == IOREQ_WRITE) )
    {
        memcpy(&p.data, p_data, size);
        p_data = NULL;
    }

    vio = &curr->arch.hvm_vcpu.hvm_io;

    if ( is_mmio && !p.data_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                memcpy(p_data, &vio->mmio_large_read[addr - pa],
                       size);
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
    }

    switch ( vio->io_state )
    {
    case HVMIO_none:
        break;
    case HVMIO_completed:
        vio->io_state = HVMIO_none;
        if ( p_data == NULL )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_UNHANDLEABLE;
        }
        goto finish_access;
    case HVMIO_dispatched:
        /* May have to wait for previous cycle of a multi-write to complete. */
        if ( is_mmio && !p.data_is_ptr && (dir == IOREQ_WRITE) &&
             (addr == (vio->mmio_large_write_pa +
                       vio->mmio_large_write_bytes)) )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_RETRY;
        }
    default:
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( hvm_io_pending(curr) )
    {
        gdprintk(XENLOG_WARNING, "WARNING: io already pending?\n");
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    vio->io_state =
        (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
    vio->io_size = size;

    /*
     * When retrying a repeated string instruction, force exit to guest after
     * completion of the retried iteration to allow handling of interrupts.
     */
    if ( vio->mmio_retrying )
        *reps = 1;

    p.count = *reps;

    if ( dir == IOREQ_WRITE )
        hvmtrace_io_assist(is_mmio, &p);

    if ( is_mmio )
    {
        rc = hvm_mmio_intercept(&p);
        if ( rc == X86EMUL_UNHANDLEABLE )
            rc = hvm_buffered_io_intercept(&p);
    }
    else
    {
        rc = hvm_portio_intercept(&p);
    }

    switch ( rc )
    {
    case X86EMUL_OKAY:
    case X86EMUL_RETRY:
        *reps = p.count;
        p.state = STATE_IORESP_READY;
        if ( !vio->mmio_retry )
        {
            hvm_io_assist(&p);
            vio->io_state = HVMIO_none;
        }
        else
            /* Defer hvm_io_assist() invocation to hvm_do_resume(). */
            vio->io_state = HVMIO_handle_mmio_awaiting_completion;
        break;
    case X86EMUL_UNHANDLEABLE:
        /* If there is no backing DM, just ignore accesses */
        if ( !hvm_has_dm(curr->domain) )
        {
            rc = X86EMUL_OKAY;
            vio->io_state = HVMIO_none;
        }
        else
        {
            rc = X86EMUL_RETRY;
            if ( !hvm_send_assist_req(&p) )
                vio->io_state = HVMIO_none;
            else if ( p_data == NULL )
                rc = X86EMUL_OKAY;
        }
        break;
    default:
        BUG();
    }

    if ( rc != X86EMUL_OKAY )
    {
        if ( ram_page )
            put_page(ram_page);
        return rc;
    }

 finish_access:
    if ( dir == IOREQ_READ )
        hvmtrace_io_assist(is_mmio, &p);

    if ( p_data != NULL )
        memcpy(p_data, &vio->io_data, size);

    if ( is_mmio && !p.data_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_write_pa = addr;
            if ( addr == (pa + bytes) )
                vio->mmio_large_write_bytes += size;
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_read_pa = addr;
            if ( (addr == (pa + bytes)) &&
                 ((bytes + size) <= sizeof(vio->mmio_large_read)) )
            {
                memcpy(&vio->mmio_large_read[bytes], p_data, size);
                vio->mmio_large_read_bytes += size;
            }
        }
    }

    if ( ram_page )
        put_page(ram_page);
    return X86EMUL_OKAY;
}

int hvmemul_do_pio(
    unsigned long port, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    return hvmemul_do_io(0, port, reps, size, ram_gpa, dir, df, p_data);
}

static int hvmemul_do_mmio(
    paddr_t gpa, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    return hvmemul_do_io(1, gpa, reps, size, ram_gpa, dir, df, p_data);
}

/*
 * Convert addr from linear to physical form, valid over the range
 * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
 * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
 * @pfec indicates the access checks to be performed during page-table walks.
 */
static int hvmemul_linear_to_phys(
    unsigned long addr,
    paddr_t *paddr,
    unsigned int bytes_per_rep,
    unsigned long *reps,
    uint32_t pfec,
    struct hvm_emulate_ctxt *hvmemul_ctxt)
{
    struct vcpu *curr = current;
    unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK;
    int reverse;

    /*
     * Clip repetitions to a sensible maximum. This avoids extensive looping in
     * this function while still amortising the cost of I/O trap-and-emulate.
     */
    *reps = min_t(unsigned long, *reps, 4096);

    /* With no paging it's easy: linear == physical. */
    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
    {
        *paddr = addr;
        return X86EMUL_OKAY;
    }

    /* Reverse mode if this is a backwards multi-iteration string operation. */
    reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1);

    if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) )
    {
        /* Do page-straddling first iteration forwards via recursion. */
        paddr_t _paddr;
        unsigned long one_rep = 1;
        int rc = hvmemul_linear_to_phys(
            addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt);
        if ( rc != X86EMUL_OKAY )
            return rc;
        pfn = _paddr >> PAGE_SHIFT;
    }
    else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
Exemple #6
0
static void update_reference_tsc(struct domain *d, bool_t initialize)
{
    unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn;
    struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
    HV_REFERENCE_TSC_PAGE *p;

    if ( !page || !get_page_type(page, PGT_writable_page) )
    {
        if ( page )
            put_page(page);
        gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
                 gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
        return;
    }

    p = __map_domain_page(page);

    if ( initialize )
        clear_page(p);

    /*
     * This enlightenment must be disabled is the host TSC is not invariant.
     * However it is also disabled if vtsc is true (which means rdtsc is being
     * emulated). This generally happens when guest TSC freq and host TSC freq
     * don't match. The TscScale value could be adjusted to cope with this,
     * allowing vtsc to be turned off, but support for this is not yet present
     * in the hypervisor. Thus is it is possible that migrating a Windows VM
     * between hosts of differing TSC frequencies may result in large
     * differences in guest performance.
     */
    if ( !host_tsc_is_safe() || d->arch.vtsc )
    {
        /*
         * The specification states that valid values of TscSequence range
         * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate
         * this mechanism is no longer a reliable source of time and that
         * the VM should fall back to a different source.
         *
         * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate
         * the spec. and rely on a value of 0 to indicate that this
         * enlightenment should no longer be used. These two kernel
         * versions are currently the only ones to make use of this
         * enlightenment, so just use 0 here.
         */
        p->TscSequence = 0;

        printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
               d->domain_id);
        goto out;
    }

    /*
     * The guest will calculate reference time according to the following
     * formula:
     *
     * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset
     *
     * Windows uses a 100ns tick, so we need a scale which is cpu
     * ticks per 100ns shifted left by 64.
     */
    p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;

    p->TscSequence++;
    if ( p->TscSequence == 0xFFFFFFFF ||
         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
        p->TscSequence = 1;

 out:
    unmap_domain_page(p);

    put_page_and_type(page);
}
Exemple #7
0
static int hvmemul_do_io(
    int is_mmio, paddr_t addr, unsigned long *reps, int size,
    paddr_t ram_gpa, int dir, int df, void *p_data)
{
    paddr_t value = ram_gpa;
    int value_is_ptr = (p_data == NULL);
    struct vcpu *curr = current;
    struct hvm_vcpu_io *vio;
    ioreq_t *p = get_ioreq(curr);
    unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
    p2m_type_t p2mt;
    struct page_info *ram_page;
    int rc;

    /* Check for paged out page */
    ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE);
    if ( p2m_is_paging(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        p2m_mem_paging_populate(curr->domain, ram_gfn);
        return X86EMUL_RETRY;
    }
    if ( p2m_is_shared(p2mt) )
    {
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_RETRY;
    }

    /*
     * Weird-sized accesses have undefined behaviour: we discard writes
     * and read all-ones.
     */
    if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
    {
        gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
        ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
        if ( dir == IOREQ_READ )
            memset(p_data, ~0, size);
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( (p_data != NULL) && (dir == IOREQ_WRITE) )
    {
        memcpy(&value, p_data, size);
        p_data = NULL;
    }

    vio = &curr->arch.hvm_vcpu.hvm_io;

    if ( is_mmio && !value_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
            {
                memcpy(p_data, &vio->mmio_large_read[addr - pa],
                       size);
                if ( ram_page )
                    put_page(ram_page);
                return X86EMUL_OKAY;
            }
        }
    }

    switch ( vio->io_state )
    {
    case HVMIO_none:
        break;
    case HVMIO_completed:
        vio->io_state = HVMIO_none;
        if ( p_data == NULL )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_UNHANDLEABLE;
        }
        goto finish_access;
    case HVMIO_dispatched:
        /* May have to wait for previous cycle of a multi-write to complete. */
        if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) &&
             (addr == (vio->mmio_large_write_pa +
                       vio->mmio_large_write_bytes)) )
        {
            if ( ram_page )
                put_page(ram_page);
            return X86EMUL_RETRY;
        }
    default:
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    if ( p->state != STATE_IOREQ_NONE )
    {
        gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
                 p->state);
        if ( ram_page )
            put_page(ram_page);
        return X86EMUL_UNHANDLEABLE;
    }

    vio->io_state =
        (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
    vio->io_size = size;

    p->dir = dir;
    p->data_is_ptr = value_is_ptr;
    p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
    p->size = size;
    p->addr = addr;
    p->count = *reps;
    p->df = df;
    p->data = value;

    if ( dir == IOREQ_WRITE )
        hvmtrace_io_assist(is_mmio, p);

    if ( is_mmio )
    {
        rc = hvm_mmio_intercept(p);
        if ( rc == X86EMUL_UNHANDLEABLE )
            rc = hvm_buffered_io_intercept(p);
    }
    else
    {
        rc = hvm_portio_intercept(p);
    }

    switch ( rc )
    {
    case X86EMUL_OKAY:
    case X86EMUL_RETRY:
        *reps = p->count;
        p->state = STATE_IORESP_READY;
        hvm_io_assist();
        vio->io_state = HVMIO_none;
        break;
    case X86EMUL_UNHANDLEABLE:
        rc = X86EMUL_RETRY;
        if ( !hvm_send_assist_req(curr) )
            vio->io_state = HVMIO_none;
        else if ( p_data == NULL )
            rc = X86EMUL_OKAY;
        break;
    default:
        BUG();
    }

    if ( rc != X86EMUL_OKAY )
    {
        if ( ram_page )
            put_page(ram_page);
        return rc;
    }

 finish_access:
    if ( dir == IOREQ_READ )
        hvmtrace_io_assist(is_mmio, p);

    if ( p_data != NULL )
        memcpy(p_data, &vio->io_data, size);

    if ( is_mmio && !value_is_ptr )
    {
        /* Part of a multi-cycle read or write? */
        if ( dir == IOREQ_WRITE )
        {
            paddr_t pa = vio->mmio_large_write_pa;
            unsigned int bytes = vio->mmio_large_write_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_write_pa = addr;
            if ( addr == (pa + bytes) )
                vio->mmio_large_write_bytes += size;
        }
        else
        {
            paddr_t pa = vio->mmio_large_read_pa;
            unsigned int bytes = vio->mmio_large_read_bytes;
            if ( bytes == 0 )
                pa = vio->mmio_large_read_pa = addr;
            if ( (addr == (pa + bytes)) &&
                 ((bytes + size) <
                  sizeof(vio->mmio_large_read)) )
            {
                memcpy(&vio->mmio_large_read[addr - pa], p_data, size);
                vio->mmio_large_read_bytes += size;
            }
        }
    }

    if ( ram_page )
        put_page(ram_page);
    return X86EMUL_OKAY;
}