static void enable_hypercall_page(struct domain *d) { unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); uint8_t *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); /* * We set the bit 31 in %eax (reserved field in the Viridian hypercall * calling convention) to differentiate Xen and Viridian hypercalls. */ *(u8 *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */ *(u32 *)(p + 1) = 0x80000000; *(u8 *)(p + 5) = 0x0f; /* vmcall/vmmcall */ *(u8 *)(p + 6) = 0x01; *(u8 *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9); *(u8 *)(p + 8) = 0xc3; /* ret */ memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */ unmap_domain_page(p); put_page_and_type(page); }
static void initialize_apic_assist(struct vcpu *v) { struct domain *d = v->domain; unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); uint8_t *p; /* * We don't yet make use of the APIC assist page but by setting * the CPUID3A_MSR_APIC_ACCESS bit in CPUID leaf 40000003 we are duty * bound to support the MSR. We therefore do just enough to keep windows * happy. * * See http://msdn.microsoft.com/en-us/library/ff538657%28VS.85%29.aspx for * details of how Windows uses the page. */ if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, page_to_mfn(page)); return; } p = __map_domain_page(page); *(u32 *)p = 0; unmap_domain_page(p); put_page_and_type(page); }
static void initialize_apic_assist(struct vcpu *v) { struct domain *d = v->domain; unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.msr.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); void *va; /* * See section 13.3.4.1 of the specification for details of this * enlightenment. */ if ( !page ) goto fail; if ( !get_page_type(page, PGT_writable_page) ) { put_page(page); goto fail; } va = __map_domain_page_global(page); if ( !va ) { put_page_and_type(page); goto fail; } *(uint32_t *)va = 0; if ( viridian_feature_mask(v->domain) & HVMPV_apic_assist ) { /* * If we overwrite an existing address here then something has * gone wrong and a domain page will leak. Instead crash the * domain to make the problem obvious. */ if ( v->arch.hvm_vcpu.viridian.apic_assist.va ) domain_crash(d); v->arch.hvm_vcpu.viridian.apic_assist.va = va; return; } unmap_domain_page_global(va); put_page_and_type(page); return; fail: gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); }
static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) { struct vcpu *v; struct vpmu_struct *vpmu; struct page_info *page; uint64_t gfn = params->val; if ( (vpmu_mode == XENPMU_MODE_OFF) || ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) return -EINVAL; if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) ) return -EINVAL; page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); if ( !page ) return -EINVAL; if ( !get_page_type(page, PGT_writable_page) ) { put_page(page); return -EINVAL; } v = d->vcpu[params->vcpu]; vpmu = vcpu_vpmu(v); spin_lock(&vpmu->vpmu_lock); if ( v->arch.vpmu.xenpmu_data ) { spin_unlock(&vpmu->vpmu_lock); put_page_and_type(page); return -EEXIST; } v->arch.vpmu.xenpmu_data = __map_domain_page_global(page); if ( !v->arch.vpmu.xenpmu_data ) { spin_unlock(&vpmu->vpmu_lock); put_page_and_type(page); return -ENOMEM; } vpmu_initialise(v); spin_unlock(&vpmu->vpmu_lock); return 0; }
void free(void *ptr) { tag page_type = get_page_type(ptr); switch(page_type) { case TYPE_MEDIUM: medium_free(ptr); return; case TYPE_SMALL: small_free(ptr); return; case TYPE_BIG: big_free(ptr); return; } }
static void update_reference_tsc(struct domain *d, bool_t initialize) { unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); HV_REFERENCE_TSC_PAGE *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); if ( initialize ) clear_page(p); /* * This enlightenment must be disabled is the host TSC is not invariant. * However it is also disabled if vtsc is true (which means rdtsc is being * emulated). This generally happens when guest TSC freq and host TSC freq * don't match. The TscScale value could be adjusted to cope with this, * allowing vtsc to be turned off, but support for this is not yet present * in the hypervisor. Thus is it is possible that migrating a Windows VM * between hosts of differing TSC frequencies may result in large * differences in guest performance. */ if ( !host_tsc_is_safe() || d->arch.vtsc ) { /* * The specification states that valid values of TscSequence range * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate * this mechanism is no longer a reliable source of time and that * the VM should fall back to a different source. * * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate * the spec. and rely on a value of 0 to indicate that this * enlightenment should no longer be used. These two kernel * versions are currently the only ones to make use of this * enlightenment, so just use 0 here. */ p->TscSequence = 0; printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n", d->domain_id); goto out; } /* * The guest will calculate reference time according to the following * formula: * * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset * * Windows uses a 100ns tick, so we need a scale which is cpu * ticks per 100ns shifted left by 64. */ p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32; p->TscSequence++; if ( p->TscSequence == 0xFFFFFFFF || p->TscSequence == 0 ) /* Avoid both 'invalid' values */ p->TscSequence = 1; out: unmap_domain_page(p); put_page_and_type(page); }
/* * Returns 0 if TLB flush / invalidate required by caller. * va will indicate the address to be invalidated. * * addr is _either_ a host virtual address, or the address of the pte to * update, as indicated by the GNTMAP_contains_pte flag. */ static void __gnttab_map_grant_ref( struct gnttab_map_grant_ref *op) { struct domain *ld, *rd; struct vcpu *led; int handle; unsigned long frame = 0; int rc = GNTST_okay; unsigned int cache_flags; struct active_grant_entry *act; struct grant_mapping *mt; grant_entry_t *sha; union grant_combo scombo, prev_scombo, new_scombo; /* * We bound the number of times we retry CMPXCHG on memory locations that * we share with a guest OS. The reason is that the guest can modify that * location at a higher rate than we can read-modify-CMPXCHG, so the guest * could cause us to livelock. There are a few cases where it is valid for * the guest to race our updates (e.g., to change the GTF_readonly flag), * so we allow a few retries before failing. */ int retries = 0; led = current; ld = led->domain; if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) { gdprintk(XENLOG_INFO, "Bad flags in grant map op (%x).\n", op->flags); op->status = GNTST_bad_gntref; return; } if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) ) { gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom); op->status = GNTST_bad_domain; return; } rc = xsm_grant_mapref(ld, rd, op->flags); if ( rc ) { rcu_unlock_domain(rd); op->status = GNTST_permission_denied; return; } if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) { rcu_unlock_domain(rd); gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n"); op->status = GNTST_no_device_space; return; } spin_lock(&rd->grant_table->lock); /* Bounds check on the grant ref */ if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table))) PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref); act = &active_entry(rd->grant_table, op->ref); sha = &shared_entry(rd->grant_table, op->ref); /* If already pinned, check the active domid and avoid refcnt overflow. */ if ( act->pin && ((act->domid != ld->domain_id) || (act->pin & 0x80808080U) != 0) ) PIN_FAIL(unlock_out, GNTST_general_error, "Bad domain (%d != %d), or risk of counter overflow %08x\n", act->domid, ld->domain_id, act->pin); if ( !act->pin || (!(op->flags & GNTMAP_readonly) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) ) { scombo.word = *(u32 *)&sha->flags; /* * This loop attempts to set the access (reading/writing) flags * in the grant table entry. It tries a cmpxchg on the field * up to five times, and then fails under the assumption that * the guest is misbehaving. */ for ( ; ; ) { /* If not already pinned, check the grant domid and type. */ if ( !act->pin && (((scombo.shorts.flags & GTF_type_mask) != GTF_permit_access) || (scombo.shorts.domid != ld->domain_id)) ) PIN_FAIL(unlock_out, GNTST_general_error, "Bad flags (%x) or dom (%d). (expected dom %d)\n", scombo.shorts.flags, scombo.shorts.domid, ld->domain_id); new_scombo = scombo; new_scombo.shorts.flags |= GTF_reading; if ( !(op->flags & GNTMAP_readonly) ) { new_scombo.shorts.flags |= GTF_writing; if ( unlikely(scombo.shorts.flags & GTF_readonly) ) PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } prev_scombo.word = cmpxchg((u32 *)&sha->flags, scombo.word, new_scombo.word); if ( likely(prev_scombo.word == scombo.word) ) break; if ( retries++ == 4 ) PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); scombo = prev_scombo; } if ( !act->pin ) { act->domid = scombo.shorts.domid; act->frame = gmfn_to_mfn(rd, sha->frame); } } if ( op->flags & GNTMAP_device_map ) act->pin += (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; if ( op->flags & GNTMAP_host_map ) act->pin += (op->flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act->frame; cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); spin_unlock(&rd->grant_table->lock); if ( is_iomem_page(frame) ) { if ( !iomem_access_permitted(rd, frame, frame) ) { gdprintk(XENLOG_WARNING, "Iomem mapping not permitted %lx (domain %d)\n", frame, rd->domain_id); rc = GNTST_general_error; goto undo_out; } rc = create_grant_host_mapping( op->host_addr, frame, op->flags, cache_flags); if ( rc != GNTST_okay ) goto undo_out; } else { if ( unlikely(!mfn_valid(frame)) || unlikely(!(gnttab_host_mapping_get_page_type(op, ld, rd) ? get_page_and_type(mfn_to_page(frame), rd, PGT_writable_page) : get_page(mfn_to_page(frame), rd))) ) { if ( !rd->is_dying ) gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n", frame); rc = GNTST_general_error; goto undo_out; } if ( op->flags & GNTMAP_host_map ) { rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0); if ( rc != GNTST_okay ) { if ( gnttab_host_mapping_get_page_type(op, ld, rd) ) put_page_type(mfn_to_page(frame)); put_page(mfn_to_page(frame)); goto undo_out; } if ( op->flags & GNTMAP_device_map ) { (void)get_page(mfn_to_page(frame), rd); if ( !(op->flags & GNTMAP_readonly) ) get_page_type(mfn_to_page(frame), PGT_writable_page); } } } TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom); mt = &maptrack_entry(ld->grant_table, handle); mt->domid = op->dom; mt->ref = op->ref; mt->flags = op->flags; op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; op->status = GNTST_okay; rcu_unlock_domain(rd); return; undo_out: spin_lock(&rd->grant_table->lock); act = &active_entry(rd->grant_table, op->ref); sha = &shared_entry(rd->grant_table, op->ref); if ( op->flags & GNTMAP_device_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; if ( op->flags & GNTMAP_host_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; if ( !(op->flags & GNTMAP_readonly) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) gnttab_clear_flag(_GTF_writing, &sha->flags); if ( !act->pin ) gnttab_clear_flag(_GTF_reading, &sha->flags); unlock_out: spin_unlock(&rd->grant_table->lock); op->status = rc; put_maptrack_handle(ld->grant_table, handle); rcu_unlock_domain(rd); }