static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) { struct evtchn *chn; struct domain *d = current->domain; struct vcpu *v = d->vcpu[0]; struct pirq *info; int port, pirq = bind->pirq; long rc; if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) return -EINVAL; if ( !is_hvm_domain(d) && !irq_access_permitted(d, pirq) ) return -EPERM; spin_lock(&d->event_lock); if ( pirq_to_evtchn(d, pirq) != 0 ) ERROR_EXIT(-EEXIST); if ( (port = get_free_port(d)) < 0 ) ERROR_EXIT(port); chn = evtchn_from_port(d, port); info = pirq_get_info(d, pirq); if ( !info ) ERROR_EXIT(-ENOMEM); info->evtchn = port; rc = (!is_hvm_domain(d) ? pirq_guest_bind(v, info, !!(bind->flags & BIND_PIRQ__WILL_SHARE)) : 0); if ( rc != 0 ) { info->evtchn = 0; pirq_cleanup_check(info, d); goto out; } chn->state = ECS_PIRQ; chn->u.pirq.irq = pirq; link_pirq_port(port, chn, v); bind->port = port; #ifdef CONFIG_X86 if ( is_hvm_domain(d) && domain_pirq_to_irq(d, pirq) > 0 ) map_domain_emuirq_pirq(d, pirq, IRQ_PT); #endif out: spin_unlock(&d->event_lock); return rc; }
static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) { struct evtchn *chn; struct domain *d = current->domain; struct vcpu *v = d->vcpu[0]; int port, pirq = bind->pirq; long rc; if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) return -EINVAL; if ( !is_hvm_domain(d) && !irq_access_permitted(d, pirq) ) return -EPERM; spin_lock(&d->event_lock); if ( d->pirq_to_evtchn[pirq] != 0 ) ERROR_EXIT(-EEXIST); if ( (port = get_free_port(d)) < 0 ) ERROR_EXIT(port); chn = evtchn_from_port(d, port); d->pirq_to_evtchn[pirq] = port; rc = (!is_hvm_domain(d) ? pirq_guest_bind( v, pirq, !!(bind->flags & BIND_PIRQ__WILL_SHARE)) : 0); if ( rc != 0 ) { d->pirq_to_evtchn[pirq] = 0; goto out; } chn->state = ECS_PIRQ; chn->u.pirq.irq = pirq; link_pirq_port(port, chn, v); bind->port = port; if ( is_hvm_domain(d) && domain_pirq_to_irq(d, pirq) > 0 ) map_domain_emuirq_pirq(d, pirq, IRQ_PT); out: spin_unlock(&d->event_lock); return rc; }
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn) { u64 iommu_l2e; unsigned long flags; struct hvm_iommu *hd = domain_hvm_iommu(d); int iw = IOMMU_IO_WRITE_ENABLED; int ir = IOMMU_IO_READ_ENABLED; BUG_ON( !hd->root_table ); spin_lock_irqsave(&hd->mapping_lock, flags); if ( is_hvm_domain(d) && !hd->p2m_synchronized ) goto out; iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); out: spin_unlock_irqrestore(&hd->mapping_lock, flags); return 0; }
static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap) { struct domain *d; int ret; ret = rcu_lock_target_domain_by_id(unmap->domid, &d); if ( ret ) return ret; if ( is_hvm_domain(d) ) { spin_lock(&d->event_lock); if ( domain_pirq_to_emuirq(d, unmap->pirq) != IRQ_UNBOUND ) ret = unmap_domain_pirq_emuirq(d, unmap->pirq); spin_unlock(&d->event_lock); if ( unmap->domid == DOMID_SELF || ret ) goto free_domain; } ret = -EPERM; if ( !IS_PRIV_FOR(current->domain, d) ) goto free_domain; spin_lock(&pcidevs_lock); spin_lock(&d->event_lock); ret = unmap_domain_pirq(d, unmap->pirq); spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); free_domain: rcu_unlock_domain(d); return ret; }
int switch_compat(struct domain *d) { struct vcpu *v; int rc; if ( is_hvm_domain(d) || d->tot_pages != 0 ) return -EACCES; if ( is_pv_32bit_domain(d) ) return 0; d->arch.has_32bit_shinfo = 1; d->arch.is_32bit_pv = 1; for_each_vcpu( d, v ) { if ( (rc = setup_compat_arg_xlat(v)) || (rc = setup_compat_l4(v)) ) goto undo_and_fail; } domain_set_alloc_bitsize(d); recalculate_cpuid_policy(d); d->arch.x87_fip_width = 4; return 0; undo_and_fail: d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; for_each_vcpu( d, v ) { free_compat_arg_xlat(v); release_compat_l4(v); }
int physdev_unmap_pirq(domid_t domid, int pirq) { struct domain *d; int ret; d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; ret = xsm_unmap_domain_pirq(XSM_TARGET, d); if ( ret ) goto free_domain; if ( is_hvm_domain(d) ) { spin_lock(&d->event_lock); if ( domain_pirq_to_emuirq(d, pirq) != IRQ_UNBOUND ) ret = unmap_domain_pirq_emuirq(d, pirq); spin_unlock(&d->event_lock); if ( domid == DOMID_SELF || ret ) goto free_domain; } spin_lock(&pcidevs_lock); spin_lock(&d->event_lock); ret = unmap_domain_pirq(d, pirq); spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); free_domain: rcu_unlock_domain(d); return ret; }
/* * Currently all CPUs are redenzevous at the MCE softirq handler, no * need to consider paging p2m type * Currently only support HVM guest with EPT paging mode * XXX following situation missed: * PoD, Foreign mapped, Granted, Shared */ int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn) { mfn_t r_mfn; p2m_type_t pt; int rc; /* Always trust dom0's MCE handler will prevent future access */ if ( d == dom0 ) return 0; if (!mfn_valid(mfn_x(mfn))) return -EINVAL; if ( !is_hvm_domain(d) || !paging_mode_hap(d) ) return -ENOSYS; rc = -1; r_mfn = get_gfn_query(d, gfn, &pt); if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES) { ASSERT(mfn_x(r_mfn) == mfn_x(mfn)); p2m_change_type(d, gfn, pt, p2m_ram_broken); rc = 0; } put_gfn(d, gfn); return rc; }
/* * for Intel MCE, broadcast vMCE to all vcpus * for AMD MCE, only inject vMCE to vcpu0 * * @ d, domain to which would inject vmce * @ vcpu, * -1 (VMCE_INJECT_BROADCAST), broadcast vMCE to all vcpus * >= 0, vcpu, the vMCE is injected to */ int inject_vmce(struct domain *d, int vcpu) { struct vcpu *v; int ret = -ESRCH; for_each_vcpu ( d, v ) { if ( vcpu != VMCE_INJECT_BROADCAST && vcpu != v->vcpu_id ) continue; if ( (is_hvm_domain(d) || guest_has_trap_callback(d, v->vcpu_id, TRAP_machine_check)) && !test_and_set_bool(v->mce_pending) ) { mce_printk(MCE_VERBOSE, "MCE: inject vMCE to d%d:v%d\n", d->domain_id, v->vcpu_id); vcpu_kick(v); ret = 0; } else { mce_printk(MCE_QUIET, "Failed to inject vMCE to d%d:v%d\n", d->domain_id, v->vcpu_id); ret = -EBUSY; break; } if ( vcpu != VMCE_INJECT_BROADCAST ) break; } return ret; }
static void decrease_reservation(struct memop_args *a) { unsigned long i, j; xen_pfn_t gmfn; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) || a->extent_order > MAX_ORDER ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( i != a->nr_done && hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) goto out; if ( tb_init_done ) { struct { u64 gfn; int d:16,order:16; } t; t.gfn = gmfn; t.d = a->domain->domain_id; t.order = a->extent_order; __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), &t); } /* See if populate-on-demand wants to handle this */ if ( is_hvm_domain(a->domain) && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) ) continue; /* With the lack for iommu on some ARM platform, domain with DMA-capable * device must retrieve the same pfn when the hypercall * populate_physmap is called. */ if ( is_domain_direct_mapped(a->domain) ) continue; for ( j = 0; j < (1 << a->extent_order); j++ ) if ( !guest_remove_page(a->domain, gmfn + j) ) goto out; } out: a->nr_done = i; }
static void decrease_reservation(struct memop_args *a) { unsigned long i, j; xen_pfn_t gmfn; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; mcd_mem_dec_trap(a->domain, (a->nr_extents - a->nr_done)); for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) goto out; if ( tb_init_done ) { struct { u64 gfn; int d:16,order:16; } t; t.gfn = gmfn; t.d = a->domain->domain_id; t.order = a->extent_order; __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), &t); } /* See if populate-on-demand wants to handle this */ if ( is_hvm_domain(a->domain) && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) ) continue; for ( j = 0; j < (1 << a->extent_order); j++ ) if ( !guest_remove_page(a->domain, gmfn + j) ) goto out; } out: a->nr_done = i; mcd_mem_upt_trap(a->domain); }
int amd_iommu_sync_p2m(struct domain *d) { unsigned long mfn, gfn, flags; u64 iommu_l2e; struct list_head *entry; struct page_info *page; struct hvm_iommu *hd; int iw = IOMMU_IO_WRITE_ENABLED; int ir = IOMMU_IO_READ_ENABLED; if ( !is_hvm_domain(d) ) return 0; hd = domain_hvm_iommu(d); spin_lock_irqsave(&hd->mapping_lock, flags); if ( hd->p2m_synchronized ) goto out; for ( entry = d->page_list.next; entry != &d->page_list; entry = entry->next ) { page = list_entry(entry, struct page_info, list); mfn = page_to_mfn(page); gfn = get_gpfn_from_mfn(mfn); if ( gfn == INVALID_M2P_ENTRY ) continue; iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); } hd->p2m_synchronized = 1; out: spin_unlock_irqrestore(&hd->mapping_lock, flags); return 0; }
static void vmcb_dump(unsigned char ch) { struct domain *d; struct vcpu *v; printk("*********** VMCB Areas **************\n"); rcu_read_lock(&domlist_read_lock); for_each_domain ( d ) { if ( !is_hvm_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) { printk("\tVCPU %d\n", v->vcpu_id); svm_vmcb_dump("key_handler", v->arch.hvm_svm.vmcb); } }
static void vmcs_dump(unsigned char ch) { struct domain *d; struct vcpu *v; printk("*********** VMCS Areas **************\n"); rcu_read_lock(&domlist_read_lock); for_each_domain ( d ) { if ( !is_hvm_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) { printk("\tVCPU %d\n", v->vcpu_id); vmcs_dump_vcpu(v); } }
void send_guest_pirq(struct domain *d, const struct pirq *pirq) { int port; struct evtchn *chn; /* * PV guests: It should not be possible to race with __evtchn_close(). The * caller of this function must synchronise with pirq_guest_unbind(). * HVM guests: Port is legitimately zero when the guest disables the * emulated interrupt/evtchn. */ if ( pirq == NULL || (port = pirq->evtchn) == 0 ) { BUG_ON(!is_hvm_domain(d)); return; } chn = evtchn_from_port(d, port); evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); }
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn) { u64 iommu_l2e; unsigned long flags; struct amd_iommu *iommu; struct hvm_iommu *hd = domain_hvm_iommu(d); BUG_ON( !hd->root_table ); spin_lock_irqsave(&hd->mapping_lock, flags); if ( is_hvm_domain(d) && !hd->p2m_synchronized ) { spin_unlock_irqrestore(&hd->mapping_lock, flags); return 0; } iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } /* mark PTE as 'page not present' */ clear_iommu_l1e_present(iommu_l2e, gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); /* send INVALIDATE_IOMMU_PAGES command */ for_each_amd_iommu ( iommu ) { spin_lock_irqsave(&iommu->lock, flags); invalidate_iommu_page(iommu, (u64)gfn << PAGE_SHIFT, hd->domain_id); flush_command_buffer(iommu); spin_unlock_irqrestore(&iommu->lock, flags); } return 0; }
static inline uint32_t get_capabilities(struct domain *d) { uint32_t capabilities = 0; /* * At the moment only Intel HVM domains are supported. However, event * delivery could be extended to AMD and PV domains. */ if ( !is_hvm_domain(d) || !cpu_has_vmx ) return capabilities; capabilities = (1 << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) | (1 << XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR) | (1 << XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT) | (1 << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST); /* Since we know this is on VMX, we can just call the hvm func */ if ( hvm_is_singlestep_supported() ) capabilities |= (1 << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP); return capabilities; }
int __init construct_dom0(struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *(*bootstrap_map)(const module_t *), char *cmdline) { /* Sanity! */ BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); BUG_ON(d->vcpu[0]->is_initialised); process_pending_softirqs(); #ifdef CONFIG_SHADOW_PAGING if ( opt_dom0_shadow && !dom0_pvh ) { opt_dom0_shadow = false; printk(XENLOG_WARNING "Shadow Dom0 requires PVH. Option ignored.\n"); } #endif return (is_hvm_domain(d) ? dom0_construct_pvh : dom0_construct_pv) (d, image, image_headroom, initrd,bootstrap_map, cmdline); }
int physdev_map_pirq(domid_t domid, int type, int *index, int *pirq_p, struct msi_info *msi) { struct domain *d = current->domain; int pirq, irq, ret = 0; void *map_data = NULL; if ( domid == DOMID_SELF && is_hvm_domain(d) ) { /* * Only makes sense for vector-based callback, else HVM-IRQ logic * calls back into itself and deadlocks on hvm_domain.irq_lock. */ if ( !is_hvm_pv_evtchn_domain(d) ) return -EINVAL; return physdev_hvm_map_pirq(d, type, index, pirq_p); } d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; ret = xsm_map_domain_pirq(XSM_TARGET, d); if ( ret ) goto free_domain; /* Verify or get irq. */ switch ( type ) { case MAP_PIRQ_TYPE_GSI: if ( *index < 0 || *index >= nr_irqs_gsi ) { dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id, *index); ret = -EINVAL; goto free_domain; } irq = domain_pirq_to_irq(current->domain, *index); if ( irq <= 0 ) { if ( is_hardware_domain(current->domain) ) irq = *index; else { dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n", d->domain_id); ret = -EINVAL; goto free_domain; } } break; case MAP_PIRQ_TYPE_MSI: if ( !msi->table_base ) msi->entry_nr = 1; irq = *index; if ( irq == -1 ) case MAP_PIRQ_TYPE_MULTI_MSI: irq = create_irq(NUMA_NO_NODE); if ( irq < nr_irqs_gsi || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n", d->domain_id); ret = -EINVAL; goto free_domain; } msi->irq = irq; map_data = msi; break; default: dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n", d->domain_id, type); ret = -EINVAL; goto free_domain; } spin_lock(&pcidevs_lock); /* Verify or get pirq. */ spin_lock(&d->event_lock); pirq = domain_irq_to_pirq(d, irq); if ( *pirq_p < 0 ) { if ( pirq ) { dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n", d->domain_id, *index, *pirq_p, pirq); if ( pirq < 0 ) { ret = -EBUSY; goto done; } } else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) { if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) ret = -EDOM; else if ( msi->entry_nr != 1 && !iommu_intremap ) ret = -EOPNOTSUPP; else { while ( msi->entry_nr & (msi->entry_nr - 1) ) msi->entry_nr += msi->entry_nr & -msi->entry_nr; pirq = get_free_pirqs(d, msi->entry_nr); if ( pirq < 0 ) { while ( (msi->entry_nr >>= 1) > 1 ) if ( get_free_pirqs(d, msi->entry_nr) > 0 ) break; dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", d->domain_id, msi->entry_nr << 1); ret = pirq; } } if ( ret < 0 ) goto done; } else { pirq = get_free_pirq(d, type); if ( pirq < 0 ) { dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id); ret = pirq; goto done; } } }
static void update_domain_cpuid_info(struct domain *d, const xen_domctl_cpuid_t *ctl) { switch ( ctl->input[0] ) { case 0: { union { typeof(boot_cpu_data.x86_vendor_id) str; struct { uint32_t ebx, edx, ecx; } reg; } vendor_id = { .reg = { .ebx = ctl->ebx, .edx = ctl->edx, .ecx = ctl->ecx } }; int old_vendor = d->arch.x86_vendor; d->arch.x86_vendor = get_cpu_vendor(vendor_id.str, gcv_guest); if ( is_hvm_domain(d) && (d->arch.x86_vendor != old_vendor) ) { struct vcpu *v; for_each_vcpu( d, v ) hvm_update_guest_vendor(v); } break; } case 1: d->arch.x86 = (ctl->eax >> 8) & 0xf; if ( d->arch.x86 == 0xf ) d->arch.x86 += (ctl->eax >> 20) & 0xff; d->arch.x86_model = (ctl->eax >> 4) & 0xf; if ( d->arch.x86 >= 0x6 ) d->arch.x86_model |= (ctl->eax >> 12) & 0xf0; if ( is_pv_domain(d) && ((levelling_caps & LCAP_1cd) == LCAP_1cd) ) { uint64_t mask = cpuidmask_defaults._1cd; uint32_t ecx = ctl->ecx & pv_featureset[FEATURESET_1c]; uint32_t edx = ctl->edx & pv_featureset[FEATURESET_1d]; /* * Must expose hosts HTT and X2APIC value so a guest using native * CPUID can correctly interpret other leaves which cannot be * masked. */ if ( cpu_has_x2apic ) ecx |= cpufeat_mask(X86_FEATURE_X2APIC); if ( cpu_has_htt ) edx |= cpufeat_mask(X86_FEATURE_HTT); switch ( boot_cpu_data.x86_vendor ) { case X86_VENDOR_INTEL: /* * Intel masking MSRs are documented as AND masks. * Experimentally, they are applied after OSXSAVE and APIC * are fast-forwarded from real hardware state. */ mask &= ((uint64_t)edx << 32) | ecx; if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) ) ecx = cpufeat_mask(X86_FEATURE_OSXSAVE); else ecx = 0; edx = cpufeat_mask(X86_FEATURE_APIC); mask |= ((uint64_t)edx << 32) | ecx; break; case X86_VENDOR_AMD: mask &= ((uint64_t)ecx << 32) | edx; /* * AMD masking MSRs are documented as overrides. * Experimentally, fast-forwarding of the OSXSAVE and APIC * bits from real hardware state only occurs if the MSR has * the respective bits set. */ if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) ) ecx = cpufeat_mask(X86_FEATURE_OSXSAVE); else ecx = 0; edx = cpufeat_mask(X86_FEATURE_APIC); mask |= ((uint64_t)ecx << 32) | edx; break; } d->arch.pv_domain.cpuidmasks->_1cd = mask; } break; case 6: if ( is_pv_domain(d) && ((levelling_caps & LCAP_6c) == LCAP_6c) ) { uint64_t mask = cpuidmask_defaults._6c; if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) mask &= (~0ULL << 32) | ctl->ecx; d->arch.pv_domain.cpuidmasks->_6c = mask; } break; case 7: if ( ctl->input[1] != 0 ) break; if ( is_pv_domain(d) && ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) ) { uint64_t mask = cpuidmask_defaults._7ab0; uint32_t eax = ctl->eax; uint32_t ebx = ctl->ebx & pv_featureset[FEATURESET_7b0]; if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) mask &= ((uint64_t)eax << 32) | ebx; d->arch.pv_domain.cpuidmasks->_7ab0 = mask; } break; case 0xd: if ( ctl->input[1] != 1 ) break; if ( is_pv_domain(d) && ((levelling_caps & LCAP_Da1) == LCAP_Da1) ) { uint64_t mask = cpuidmask_defaults.Da1; uint32_t eax = ctl->eax & pv_featureset[FEATURESET_Da1]; if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) mask &= (~0ULL << 32) | eax; d->arch.pv_domain.cpuidmasks->Da1 = mask; } break; case 0x80000001: if ( is_pv_domain(d) && ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) ) { uint64_t mask = cpuidmask_defaults.e1cd; uint32_t ecx = ctl->ecx & pv_featureset[FEATURESET_e1c]; uint32_t edx = ctl->edx & pv_featureset[FEATURESET_e1d]; /* * Must expose hosts CMP_LEGACY value so a guest using native * CPUID can correctly interpret other leaves which cannot be * masked. */ if ( cpu_has_cmp_legacy ) ecx |= cpufeat_mask(X86_FEATURE_CMP_LEGACY); /* If not emulating AMD, clear the duplicated features in e1d. */ if ( d->arch.x86_vendor != X86_VENDOR_AMD ) edx &= ~CPUID_COMMON_1D_FEATURES; switch ( boot_cpu_data.x86_vendor ) { case X86_VENDOR_INTEL: mask &= ((uint64_t)edx << 32) | ecx; break; case X86_VENDOR_AMD: mask &= ((uint64_t)ecx << 32) | edx; /* * Fast-forward bits - Must be set in the masking MSR for * fast-forwarding to occur in hardware. */ ecx = 0; edx = cpufeat_mask(X86_FEATURE_APIC); mask |= ((uint64_t)ecx << 32) | edx; break; } d->arch.pv_domain.cpuidmasks->e1cd = mask; } break; } }
unsigned long __init dom0_compute_nr_pages( struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len) { nodeid_t node; unsigned long avail = 0, nr_pages, min_pages, max_pages; bool_t need_paging; for_each_node_mask ( node, dom0_nodes ) avail += avail_domheap_pages_region(node, 0, 0) + initial_images_nrpages(node); /* Reserve memory for further dom0 vcpu-struct allocations... */ avail -= (d->max_vcpus - 1UL) << get_order_from_bytes(sizeof(struct vcpu)); /* ...and compat_l4's, if needed. */ if ( is_pv_32bit_domain(d) ) avail -= d->max_vcpus - 1; /* Reserve memory for iommu_dom0_init() (rough estimate). */ if ( iommu_enabled ) { unsigned int s; for ( s = 9; s < BITS_PER_LONG; s += 9 ) avail -= max_pdx >> s; } need_paging = is_hvm_domain(d) && (!iommu_hap_pt_share || !paging_mode_hap(d)); for ( ; ; need_paging = 0 ) { nr_pages = dom0_nrpages; min_pages = dom0_min_nrpages; max_pages = dom0_max_nrpages; /* * If allocation isn't specified, reserve 1/16th of available memory * for things like DMA buffers. This reservation is clamped to a * maximum of 128MB. */ if ( nr_pages == 0 ) nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); /* Negative specification means "all memory - specified amount". */ if ( (long)nr_pages < 0 ) nr_pages += avail; if ( (long)min_pages < 0 ) min_pages += avail; if ( (long)max_pages < 0 ) max_pages += avail; /* Clamp according to min/max limits and available memory. */ nr_pages = max(nr_pages, min_pages); nr_pages = min(nr_pages, max_pages); nr_pages = min(nr_pages, avail); if ( !need_paging ) break; /* Reserve memory for shadow or HAP. */ avail -= dom0_paging_pages(d, nr_pages); } if ( is_pv_domain(d) && (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) && ((dom0_min_nrpages <= 0) || (nr_pages > min_pages)) ) { /* * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M * note) require that there is enough virtual space beyond the initial * allocation to set up their initial page tables. This space is * roughly the same size as the p2m table, so make sure the initial * allocation doesn't consume more than about half the space that's * available between params.virt_base and the address space end. */ unsigned long vstart, vend, end; size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : sizeof(long); vstart = parms->virt_base; vend = round_pgup(parms->virt_kend); if ( !parms->unmapped_initrd ) vend += round_pgup(initrd_len); end = vend + nr_pages * sizeof_long; if ( end > vstart ) end += end - vstart; if ( end <= vstart || (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) ) { end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long); nr_pages = (end - vend) / (2 * sizeof_long); if ( dom0_min_nrpages > 0 && nr_pages < min_pages ) nr_pages = min_pages; printk("Dom0 memory clipped to %lu pages\n", nr_pages); } } d->max_pages = min_t(unsigned long, max_pages, UINT_MAX); return nr_pages; }
static long __evtchn_close(struct domain *d1, int port1) { struct domain *d2 = NULL; struct vcpu *v; struct evtchn *chn1, *chn2; int port2; long rc = 0; again: spin_lock(&d1->event_lock); if ( !port_is_valid(d1, port1) ) { rc = -EINVAL; goto out; } chn1 = evtchn_from_port(d1, port1); /* Guest cannot close a Xen-attached event channel. */ if ( unlikely(consumer_is_xen(chn1)) ) { rc = -EINVAL; goto out; } switch ( chn1->state ) { case ECS_FREE: case ECS_RESERVED: rc = -EINVAL; goto out; case ECS_UNBOUND: break; case ECS_PIRQ: { struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq); if ( !pirq ) break; if ( !is_hvm_domain(d1) ) pirq_guest_unbind(d1, pirq); pirq->evtchn = 0; pirq_cleanup_check(pirq, d1); unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]); #ifdef CONFIG_X86 if ( is_hvm_domain(d1) && domain_pirq_to_irq(d1, pirq->pirq) > 0 ) unmap_domain_pirq_emuirq(d1, pirq->pirq); #endif break; } case ECS_VIRQ: for_each_vcpu ( d1, v ) { if ( v->virq_to_evtchn[chn1->u.virq] != port1 ) continue; v->virq_to_evtchn[chn1->u.virq] = 0; spin_barrier(&v->virq_lock); } break; case ECS_IPI: break; case ECS_INTERDOMAIN: if ( d2 == NULL ) { d2 = chn1->u.interdomain.remote_dom; /* If we unlock d1 then we could lose d2. Must get a reference. */ if ( unlikely(!get_domain(d2)) ) BUG(); if ( d1 < d2 ) { spin_lock(&d2->event_lock); } else if ( d1 != d2 ) { spin_unlock(&d1->event_lock); spin_lock(&d2->event_lock); goto again; } } else if ( d2 != chn1->u.interdomain.remote_dom ) { /* * We can only get here if the port was closed and re-bound after * unlocking d1 but before locking d2 above. We could retry but * it is easier to return the same error as if we had seen the * port in ECS_CLOSED. It must have passed through that state for * us to end up here, so it's a valid error to return. */ rc = -EINVAL; goto out; } port2 = chn1->u.interdomain.remote_port; BUG_ON(!port_is_valid(d2, port2)); chn2 = evtchn_from_port(d2, port2); BUG_ON(chn2->state != ECS_INTERDOMAIN); BUG_ON(chn2->u.interdomain.remote_dom != d1); chn2->state = ECS_UNBOUND; chn2->u.unbound.remote_domid = d1->domain_id; break; default: BUG(); } /* Clear pending event to avoid unexpected behavior on re-bind. */ clear_bit(port1, &shared_info(d1, evtchn_pending)); /* Reset binding to vcpu0 when the channel is freed. */ chn1->state = ECS_FREE; chn1->notify_vcpu_id = 0; xsm_evtchn_close_post(chn1); out: if ( d2 != NULL ) { if ( d1 != d2 ) spin_unlock(&d2->event_lock); put_domain(d2); } spin_unlock(&d1->event_lock); return rc; }
int physdev_map_pirq(domid_t domid, int type, int *index, int *pirq_p, struct msi_info *msi) { struct domain *d = current->domain; int pirq, irq, ret = 0; void *map_data = NULL; if ( domid == DOMID_SELF && is_hvm_domain(d) ) { /* * Only makes sense for vector-based callback, else HVM-IRQ logic * calls back into itself and deadlocks on hvm_domain.irq_lock. */ if ( !is_hvm_pv_evtchn_domain(d) ) return -EINVAL; return physdev_hvm_map_pirq(d, type, index, pirq_p); } d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; ret = xsm_map_domain_pirq(XSM_TARGET, d); if ( ret ) goto free_domain; /* Verify or get irq. */ switch ( type ) { case MAP_PIRQ_TYPE_GSI: if ( *index < 0 || *index >= nr_irqs_gsi ) { dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id, *index); ret = -EINVAL; goto free_domain; } irq = domain_pirq_to_irq(current->domain, *index); if ( irq <= 0 ) { if ( is_hardware_domain(current->domain) ) irq = *index; else { dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n", d->domain_id); ret = -EINVAL; goto free_domain; } } break; case MAP_PIRQ_TYPE_MSI: irq = *index; if ( irq == -1 ) irq = create_irq(NUMA_NO_NODE); if ( irq < nr_irqs_gsi || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n", d->domain_id); ret = -EINVAL; goto free_domain; } msi->irq = irq; map_data = msi; break; default: dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n", d->domain_id, type); ret = -EINVAL; goto free_domain; } spin_lock(&pcidevs_lock); /* Verify or get pirq. */ spin_lock(&d->event_lock); pirq = domain_irq_to_pirq(d, irq); if ( *pirq_p < 0 ) { if ( pirq ) { dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n", d->domain_id, *index, *pirq_p, pirq); if ( pirq < 0 ) { ret = -EBUSY; goto done; } } else { pirq = get_free_pirq(d, type); if ( pirq < 0 ) { dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id); ret = pirq; goto done; } } } else { if ( pirq && pirq != *pirq_p ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d conflicts with irq %d\n", d->domain_id, *index, *pirq_p); ret = -EEXIST; goto done; } else pirq = *pirq_p; } ret = map_domain_pirq(d, pirq, irq, type, map_data); if ( ret == 0 ) *pirq_p = pirq; done: spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) destroy_irq(irq); free_domain: rcu_unlock_domain(d); return ret; }
/* Nested HVM on/off per domain */ bool_t nestedhvm_enabled(struct domain *d) { return is_hvm_domain(d) && d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM]; }
static int physdev_map_pirq(struct physdev_map_pirq *map) { struct domain *d; int pirq, irq, ret = 0; struct msi_info _msi; void *map_data = NULL; ret = rcu_lock_target_domain_by_id(map->domid, &d); if ( ret ) return ret; if ( map->domid == DOMID_SELF && is_hvm_domain(d) ) { ret = physdev_hvm_map_pirq(d, map); goto free_domain; } if ( !IS_PRIV_FOR(current->domain, d) ) { ret = -EPERM; goto free_domain; } /* Verify or get irq. */ switch ( map->type ) { case MAP_PIRQ_TYPE_GSI: if ( map->index < 0 || map->index >= nr_irqs_gsi ) { dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id, map->index); ret = -EINVAL; goto free_domain; } irq = domain_pirq_to_irq(current->domain, map->index); if ( irq <= 0 ) { if ( IS_PRIV(current->domain) ) irq = map->index; else { dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n", d->domain_id); ret = -EINVAL; goto free_domain; } } break; case MAP_PIRQ_TYPE_MSI: irq = map->index; if ( irq == -1 ) irq = create_irq(); if ( irq < 0 || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n", d->domain_id); ret = -EINVAL; goto free_domain; } _msi.bus = map->bus; _msi.devfn = map->devfn; _msi.entry_nr = map->entry_nr; _msi.table_base = map->table_base; _msi.irq = irq; map_data = &_msi; break; default: dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n", d->domain_id, map->type); ret = -EINVAL; goto free_domain; } spin_lock(&pcidevs_lock); /* Verify or get pirq. */ spin_lock(&d->event_lock); pirq = domain_irq_to_pirq(d, irq); if ( map->pirq < 0 ) { if ( pirq ) { dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n", d->domain_id, map->index, map->pirq, pirq); if ( pirq < 0 ) { ret = -EBUSY; goto done; } } else { pirq = get_free_pirq(d, map->type, map->index); if ( pirq < 0 ) { dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id); ret = pirq; goto done; } } } else { if ( pirq && pirq != map->pirq ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d conflicts with irq %d\n", d->domain_id, map->index, map->pirq); ret = -EEXIST; goto done; } else pirq = map->pirq; } ret = map_domain_pirq(d, pirq, irq, map->type, map_data); if ( ret == 0 ) map->pirq = pirq; done: spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) ) destroy_irq(irq); free_domain: rcu_unlock_domain(d); return ret; }