int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { struct domain *d = p2m->domain; unsigned long todo = 1ul << page_order; unsigned int order; int rc = 1; ASSERT(gfn_locked_by_me(p2m, gfn)); while ( todo ) { if ( hap_enabled(d) ) order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << PAGE_ORDER_1G) - 1)) == 0) && hvm_hap_has_1gb(d) && opt_hap_1gb ) ? PAGE_ORDER_1G : ((((gfn | mfn_x(mfn) | todo) & ((1ul << PAGE_ORDER_2M) - 1)) == 0) && hvm_hap_has_2mb(d) && opt_hap_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K; else order = 0; if ( !p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma) ) rc = 0; gfn += 1ul << order; if ( mfn_x(mfn) != INVALID_MFN ) mfn = _mfn(mfn_x(mfn) + (1ul << order)); todo -= 1ul << order; } return rc; }
static void p2m_free_one(struct p2m_domain *p2m) { if ( hap_enabled(p2m->domain) && cpu_has_vmx ) ept_p2m_uninit(p2m); free_cpumask_var(p2m->dirty_cpumask); xfree(p2m); }
// Allocate a new p2m table for a domain. // // The structure of the p2m table is that of a pagetable for xen (i.e. it is // controlled by CONFIG_PAGING_LEVELS). // // Returns 0 for success or -errno. // int p2m_alloc_table(struct p2m_domain *p2m) { struct page_info *p2m_top; struct domain *d = p2m->domain; p2m_lock(p2m); if ( !p2m_is_nestedp2m(p2m) && !page_list_empty(&d->page_list) ) { P2M_ERROR("dom %d already has memory allocated\n", d->domain_id); p2m_unlock(p2m); return -EINVAL; } if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { P2M_ERROR("p2m already allocated for this domain\n"); p2m_unlock(p2m); return -EINVAL; } P2M_PRINTK("allocating p2m table\n"); p2m_top = p2m_alloc_ptp(p2m, PGT_l4_page_table); if ( p2m_top == NULL ) { p2m_unlock(p2m); return -ENOMEM; } p2m->phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); if ( hap_enabled(d) ) iommu_share_p2m_table(d); P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ p2m->defer_nested_flush = 1; if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, p2m->default_access) ) goto error; p2m->defer_nested_flush = 0; P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); p2m_unlock(p2m); return 0; spin_unlock(&p2m->domain->page_alloc_lock); error: P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" PRI_mfn "\n", gfn, mfn_x(mfn)); p2m_unlock(p2m); return -ENOMEM; }
/* Init the datastructures for later use by the p2m code */ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) { int ret = 0; mm_rwlock_init(&p2m->lock); mm_lock_init(&p2m->pod.lock); INIT_LIST_HEAD(&p2m->np2m_list); INIT_PAGE_LIST_HEAD(&p2m->pages); INIT_PAGE_LIST_HEAD(&p2m->pod.super); INIT_PAGE_LIST_HEAD(&p2m->pod.single); p2m->domain = d; p2m->default_access = p2m_access_rwx; p2m->np2m_base = P2M_BASE_EADDR; if ( hap_enabled(d) && cpu_has_vmx ) ret = ept_p2m_init(p2m); else p2m_pt_init(p2m); return ret; }
void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) { const struct domain *d = v->domain; ASSERT(is_viridian_domain(d)); ASSERT(leaf >= 0x40000000 && leaf < 0x40000100); leaf -= 0x40000000; switch ( leaf ) { case 0: res->a = 0x40000006; /* Maximum leaf */ res->b = 0x7263694d; /* Magic numbers */ res->c = 0x666F736F; res->d = 0x76482074; break; case 1: res->a = 0x31237648; /* Version number */ break; case 2: /* Hypervisor information, but only if the guest has set its own version number. */ if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 ) break; res->a = 1; /* Build number */ res->b = (xen_major_version() << 16) | xen_minor_version(); res->c = 0; /* SP */ res->d = 0; /* Service branch and number */ break; case 3: /* Which hypervisor MSRs are available to the guest */ res->a = (CPUID3A_MSR_APIC_ACCESS | CPUID3A_MSR_HYPERCALL | CPUID3A_MSR_VP_INDEX); if ( !(viridian_feature_mask(d) & HVMPV_no_freq) ) res->a |= CPUID3A_MSR_FREQ; if ( viridian_feature_mask(d) & HVMPV_time_ref_count ) res->a |= CPUID3A_MSR_TIME_REF_COUNT; if ( viridian_feature_mask(d) & HVMPV_reference_tsc ) res->a |= CPUID3A_MSR_REFERENCE_TSC; break; case 4: /* Recommended hypercall usage. */ if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) || (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) ) break; res->a = CPUID4A_RELAX_TIMER_INT; if ( viridian_feature_mask(d) & HVMPV_hcall_remote_tlb_flush ) res->a |= CPUID4A_HCALL_REMOTE_TLB_FLUSH; if ( !cpu_has_vmx_apic_reg_virt ) res->a |= CPUID4A_MSR_BASED_APIC; res->b = 2047; /* long spin count */ break; case 6: /* Detected and in use hardware features. */ if ( cpu_has_vmx_virtualize_apic_accesses ) res->a |= CPUID6A_APIC_OVERLAY; if ( cpu_has_vmx_msr_bitmap || (read_efer() & EFER_SVME) ) res->a |= CPUID6A_MSR_BITMAPS; if ( hap_enabled(d) ) res->a |= CPUID6A_NESTED_PAGING; break; } }
// Returns 0 on error (out of memory) static int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); void *table =map_domain_page(mfn_x(table_mfn)); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; l2_pgentry_t l2e_content; l3_pgentry_t l3e_content; int rv=0; unsigned int iommu_pte_flags = (p2mt == p2m_ram_rw) ? IOMMUF_readable|IOMMUF_writable: 0; unsigned long old_mfn = 0; if ( tb_init_done ) { struct { u64 gfn, mfn; int p2mt; int d:16,order:16; } t; t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } #if CONFIG_PAGING_LEVELS >= 4 if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) goto out; #endif /* * Try to allocate 1GB page table if this feature is supported. */ if ( page_order == PAGE_ORDER_1G ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* * When using PAE Xen, we only allow 33 bits of pseudo-physical * address in translated guests (i.e. 8 GBytes). This restriction * comes from wanting to map the P2M table into the 16MB RO_MPT hole * in Xen's address space for translated PV guests. * When using AMD's NPT on PAE Xen, we are restricted to 4GB. */ else if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, ((CONFIG_PAGING_LEVELS == 3) ? (hap_enabled(p2m->domain) ? 4 : 8) : L3_PAGETABLE_ENTRIES), PGT_l2_page_table) ) goto out; if ( page_order == PAGE_ORDER_4K ) { if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) goto out; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || p2m_is_paging(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn)); else entry_content = l1e_empty(); if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } /* level 1 entry */ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == PAGE_ORDER_2M ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); ASSERT(p2m_entry); /* FIXME: Deal with 4k replaced by 2meg pages */ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_is_magic(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE); else l2e_content = l2e_empty(); entry_content.l1 = l2e_content.l2; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; if ( iommu_enabled && need_iommu(p2m->domain) ) { if ( iommu_hap_pt_share ) { if ( old_mfn && (old_mfn != mfn_x(mfn)) ) amd_iommu_flush_pages(p2m->domain, gfn, page_order); } else { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i, IOMMUF_readable|IOMMUF_writable); else for ( int i = 0; i < (1UL << page_order); i++ ) iommu_unmap_page(p2m->domain, gfn+i); } } /* Success */ rv = 1; out: unmap_domain_page(table); return rv; }
int cpuid_viridian_leaves(unsigned int leaf, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { struct domain *d = current->domain; if ( !is_viridian_domain(d) ) return 0; leaf -= 0x40000000; if ( leaf > 6 ) return 0; *eax = *ebx = *ecx = *edx = 0; switch ( leaf ) { case 0: *eax = 0x40000006; /* Maximum leaf */ *ebx = 0x7263694d; /* Magic numbers */ *ecx = 0x666F736F; *edx = 0x76482074; break; case 1: *eax = 0x31237648; /* Version number */ break; case 2: /* Hypervisor information, but only if the guest has set its own version number. */ if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 ) break; *eax = 1; /* Build number */ *ebx = (xen_major_version() << 16) | xen_minor_version(); *ecx = 0; /* SP */ *edx = 0; /* Service branch and number */ break; case 3: /* Which hypervisor MSRs are available to the guest */ *eax = (CPUID3A_MSR_APIC_ACCESS | CPUID3A_MSR_HYPERCALL | CPUID3A_MSR_VP_INDEX); break; case 4: /* Recommended hypercall usage. */ if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) || (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) ) break; *eax = CPUID4A_RELAX_TIMER_INT; if ( !cpu_has_vmx_apic_reg_virt ) *eax |= CPUID4A_MSR_BASED_APIC; *ebx = 2047; /* long spin count */ break; case 6: /* Detected and in use hardware features. */ if ( cpu_has_vmx_virtualize_apic_accesses ) *eax |= CPUID6A_APIC_OVERLAY; if ( cpu_has_vmx_msr_bitmap || (read_efer() & EFER_SVME) ) *eax |= CPUID6A_MSR_BITMAPS; if ( hap_enabled(d) ) *eax |= CPUID6A_NESTED_PAGING; break; } return 1; }