/* Atomically look up a GFN and take a reference count on the backing page. */ struct page_info *get_page_from_gfn_p2m( struct domain *d, struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q) { struct page_info *page = NULL; p2m_access_t _a; p2m_type_t _t; mfn_t mfn; /* Allow t or a to be NULL */ t = t ?: &_t; a = a ?: &_a; if ( likely(!p2m_locked_by_me(p2m)) ) { /* Fast path: look up and get out */ p2m_read_lock(p2m); mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0); if ( (p2m_is_ram(*t) || p2m_is_grant(*t)) && mfn_valid(mfn) && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) ) { page = mfn_to_page(mfn); if ( !get_page(page, d) /* Page could be shared */ && !get_page(page, dom_cow) ) page = NULL; } p2m_read_unlock(p2m); if ( page ) return page; /* Error path: not a suitable GFN at all */ if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) ) return NULL; } /* Slow path: take the write lock and do fixups */ mfn = get_gfn_type_access(p2m, gfn, t, a, q, NULL); if ( p2m_is_ram(*t) && mfn_valid(mfn) ) { page = mfn_to_page(mfn); if ( !get_page(page, d) ) page = NULL; } put_gfn(d, gfn); return page; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; unsigned long mfn; mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!mfn_valid(mfn)) ) { gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); return 1; }
/* * Currently all CPUs are redenzevous at the MCE softirq handler, no * need to consider paging p2m type * Currently only support HVM guest with EPT paging mode * XXX following situation missed: * PoD, Foreign mapped, Granted, Shared */ int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn) { mfn_t r_mfn; p2m_type_t pt; int rc; /* Always trust dom0's MCE handler will prevent future access */ if ( d == dom0 ) return 0; if (!mfn_valid(mfn_x(mfn))) return -EINVAL; if ( !has_hvm_container_domain(d) || !paging_mode_hap(d) ) return -ENOSYS; rc = -1; r_mfn = get_gfn_query(d, gfn, &pt); if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES) { ASSERT(mfn_x(r_mfn) == mfn_x(mfn)); p2m_change_type(d, gfn, pt, p2m_ram_broken); rc = 0; } put_gfn(d, gfn); return rc; }
static void enable_hypercall_page(void) { struct domain *d = current->domain; unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; unsigned long mfn = gmfn_to_mfn(d, gmfn); uint8_t *p; if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) { gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, mfn); return; } p = map_domain_page(mfn); /* * We set the bit 31 in %eax (reserved field in the Viridian hypercall * calling convention) to differentiate Xen and Viridian hypercalls. */ *(u8 *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */ *(u32 *)(p + 1) = 0x80000000; *(u8 *)(p + 5) = 0x0f; /* vmcall/vmmcall */ *(u8 *)(p + 6) = 0x01; *(u8 *)(p + 7) = ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ? 0xc1 : 0xd9); *(u8 *)(p + 8) = 0xc3; /* ret */ memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */ unmap_domain_page(p); put_page_and_type(mfn_to_page(mfn)); }
static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params) { struct vcpu *v; struct vpmu_struct *vpmu; uint64_t mfn; void *xenpmu_data; if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) ) return; v = d->vcpu[params->vcpu]; if ( v != current ) vcpu_pause(v); vpmu = vcpu_vpmu(v); spin_lock(&vpmu->vpmu_lock); vpmu_destroy(v); xenpmu_data = vpmu->xenpmu_data; vpmu->xenpmu_data = NULL; spin_unlock(&vpmu->vpmu_lock); if ( xenpmu_data ) { mfn = domain_page_map_to_mfn(xenpmu_data); ASSERT(mfn_valid(mfn)); unmap_domain_page_global(xenpmu_data); put_page_and_type(mfn_to_page(mfn)); } if ( v != current ) vcpu_unpause(v); }
int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) { struct p2m_domain *p2m = p2m_get_hostp2m(d); int rc = 0; p2m_access_t a; p2m_type_t ot; mfn_t omfn; unsigned long pg_type; if ( !paging_mode_translate(p2m->domain) ) return 0; gfn_lock(p2m, gfn, 0); omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL); /* At the moment we only allow p2m change if gfn has already been made * sharable first */ ASSERT(p2m_is_shared(ot)); ASSERT(mfn_valid(omfn)); /* Set the m2p entry to invalid only if there are no further type * refs to this page as shared */ pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info)); if ( (pg_type & PGT_count_mask) == 0 || (pg_type & PGT_type_mask) != PGT_shared_page ) set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn)); rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared, p2m->default_access); gfn_unlock(p2m, gfn, 0); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot))); return rc; }
int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) { int rc = 0; p2m_access_t a; p2m_type_t ot; mfn_t omfn; struct p2m_domain *p2m = p2m_get_hostp2m(d); if ( !paging_mode_translate(d) ) return 0; gfn_lock(p2m, gfn, 0); omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL); if ( p2m_is_grant(ot) ) { p2m_unlock(p2m); domain_crash(d); return 0; } else if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn)); rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct, p2m->default_access); gfn_unlock(p2m, gfn, 0); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot))); return rc; }
static void p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order) { unsigned long i; mfn_t mfn_return; p2m_type_t t; p2m_access_t a; if ( !paging_mode_translate(p2m->domain) ) { if ( need_iommu(p2m->domain) ) for ( i = 0; i < (1 << page_order); i++ ) iommu_unmap_page(p2m->domain, mfn + i); return; } ASSERT(gfn_locked_by_me(p2m, gfn)); P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); if ( mfn_valid(_mfn(mfn)) ) { for ( i = 0; i < (1UL << page_order); i++ ) { mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL); if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); } } set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access); }
/* * pgd3val: this is the value of init_mm.pgd[3] in a PV guest. It is optional. * This to assist debug of modules in the guest. The kernel address * space seems is always mapped, but modules are not necessarily * mapped in any arbitraty guest cr3 that we pick if pgd3val is 0. * Modules should always be addressible if we use cr3 from init_mm. * Since pgd3val is already a pgd value, cr3->pgd[3], we just need to * do 2 level lookups. * * NOTE: 4 level paging works for 32 PAE guests also because cpu runs in IA32-e * mode. * Returns: mfn for the given (pv guest) vaddr */ static unsigned long dbg_pv_va2mfn(dbgva_t vaddr, struct domain *dp, uint64_t pgd3val) { l4_pgentry_t l4e, *l4t; l3_pgentry_t l3e, *l3t; l2_pgentry_t l2e, *l2t; l1_pgentry_t l1e, *l1t; unsigned long cr3 = (pgd3val ? pgd3val : dp->vcpu[0]->arch.cr3); unsigned long mfn = cr3 >> PAGE_SHIFT; DBGP2("vaddr:%lx domid:%d cr3:%lx pgd3:%lx\n", vaddr, dp->domain_id, cr3, pgd3val); if ( pgd3val == 0 ) { l4t = mfn_to_virt(mfn); l4e = l4t[l4_table_offset(vaddr)]; mfn = l4e_get_pfn(l4e); DBGP2("l4t:%p l4to:%lx l4e:%lx mfn:%lx\n", l4t, l4_table_offset(vaddr), l4e, mfn); if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) { DBGP1("l4 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3); return INVALID_MFN; } l3t = mfn_to_virt(mfn); l3e = l3t[l3_table_offset(vaddr)]; mfn = l3e_get_pfn(l3e); DBGP2("l3t:%p l3to:%lx l3e:%lx mfn:%lx\n", l3t, l3_table_offset(vaddr), l3e, mfn); if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) { DBGP1("l3 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3); return INVALID_MFN; } } l2t = mfn_to_virt(mfn); l2e = l2t[l2_table_offset(vaddr)]; mfn = l2e_get_pfn(l2e); DBGP2("l2t:%p l2to:%lx l2e:%lx mfn:%lx\n", l2t, l2_table_offset(vaddr), l2e, mfn); if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_flags(l2e) & _PAGE_PSE) ) { DBGP1("l2 PAGE not present. vaddr:%lx cr3:%lx\n", vaddr, cr3); return INVALID_MFN; } l1t = mfn_to_virt(mfn); l1e = l1t[l1_table_offset(vaddr)]; mfn = l1e_get_pfn(l1e); DBGP2("l1t:%p l1to:%lx l1e:%lx mfn:%lx\n", l1t, l1_table_offset(vaddr), l1e, mfn); return mfn_valid(mfn) ? mfn : INVALID_MFN; }
/** * p2m_mem_paging_evict - Mark a guest page as paged-out * @d: guest domain * @gfn: guest page to evict * * Returns 0 for success or negative errno values if eviction is not possible. * * p2m_mem_paging_evict() is called by the pager and will free a guest page and * release it back to Xen. If the following conditions are met the page can be * freed: * - the gfn is backed by a mfn * - the gfn was nominated * - the mfn has still exactly one user and has no special meaning * * After successful nomination some other process could have mapped the page. In * this case eviction can not be done. If the gfn was populated before the pager * could evict it, eviction can not be done either. In this case the gfn is * still backed by a mfn. */ int p2m_mem_paging_evict(struct domain *d, unsigned long gfn) { struct page_info *page; p2m_type_t p2mt; p2m_access_t a; mfn_t mfn; struct p2m_domain *p2m = p2m_get_hostp2m(d); int ret = -EBUSY; gfn_lock(p2m, gfn, 0); /* Get mfn */ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL); if ( unlikely(!mfn_valid(mfn)) ) goto out; /* Allow only nominated pages */ if ( p2mt != p2m_ram_paging_out ) goto out; /* Get the page so it doesn't get modified under Xen's feet */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) goto out; /* Check page count and type once more */ if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != (2 | PGC_allocated) ) goto out_put; if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) goto out_put; /* Decrement guest domain's ref count of the page */ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); /* Remove mapping from p2m table */ set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_ram_paged, a); /* Clear content before returning the page to Xen */ scrub_one_page(page); /* Track number of paged gfns */ atomic_inc(&d->paged_pages); ret = 0; out_put: /* Put the page back so it gets freed */ put_page(page); out: gfn_unlock(p2m, gfn, 0); return ret; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } page = mfn_to_page(mfn); #ifdef CONFIG_X86 /* If gmfn is shared, just drop the guest reference (which may or may not * free the page) */ if(p2m_is_shared(p2mt)) { put_page_and_type(page); guest_physmap_remove_page(d, gmfn, mfn, 0); return 1; } #endif /* CONFIG_X86 */ if ( unlikely(!get_page(page, d)) ) { gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); return 1; }
int p2m_set_altp2m_mem_access(struct domain *d, struct p2m_domain *hp2m, struct p2m_domain *ap2m, p2m_access_t a, gfn_t gfn) { mfn_t mfn; p2m_type_t t; p2m_access_t old_a; unsigned int page_order; unsigned long gfn_l = gfn_x(gfn); int rc; mfn = ap2m->get_entry(ap2m, gfn_l, &t, &old_a, 0, NULL, NULL); /* Check host p2m if no valid entry in alternate */ if ( !mfn_valid(mfn) ) { mfn = __get_gfn_type_access(hp2m, gfn_l, &t, &old_a, P2M_ALLOC | P2M_UNSHARE, &page_order, 0); rc = -ESRCH; if ( !mfn_valid(mfn) || t != p2m_ram_rw ) return rc; /* If this is a superpage, copy that first */ if ( page_order != PAGE_ORDER_4K ) { unsigned long mask = ~((1UL << page_order) - 1); unsigned long gfn2_l = gfn_l & mask; mfn_t mfn2 = _mfn(mfn_x(mfn) & mask); rc = ap2m->set_entry(ap2m, gfn2_l, mfn2, page_order, t, old_a, 1); if ( rc ) return rc; } } return ap2m->set_entry(ap2m, gfn_l, mfn, PAGE_ORDER_4K, t, a, (current->domain != d)); }
void __init discard_initial_modules(void) { struct bootmodules *mi = &bootinfo.modules; int i; for ( i = 0; i < mi->nr_mods; i++ ) { paddr_t s = mi->module[i].start; paddr_t e = s + PAGE_ALIGN(mi->module[i].size); if ( mi->module[i].kind == BOOTMOD_XEN ) continue; if ( !mfn_valid(paddr_to_pfn(s)) || !mfn_valid(paddr_to_pfn(e))) continue; dt_unreserved_regions(s, e, init_domheap_pages, 0); } mi->nr_mods = 0; remove_early_mappings(); }
static struct mcinfo_bank *mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) { struct mcinfo_bank *mib; uint64_t addr=0, misc = 0; if (!mi) return NULL; mib = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_bank)); if (!mib) { mi->flags |= MCINFO_FLAGS_UNCOMPLETE; return NULL; } memset(mib, 0, sizeof (struct mcinfo_bank)); mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank)); mib->common.type = MC_TYPE_BANK; mib->common.size = sizeof (struct mcinfo_bank); mib->mc_bank = bank; addr = misc = 0; if (mib->mc_status & MCi_STATUS_MISCV) mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank)); if (mib->mc_status & MCi_STATUS_ADDRV) { mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank)); if (mfn_valid(paddr_to_pfn(mib->mc_addr))) { struct domain *d; d = maddr_get_owner(mib->mc_addr); if (d != NULL && (who == MCA_POLLER || who == MCA_CMCI_HANDLER)) mib->mc_domid = d->domain_id; } } if (who == MCA_CMCI_HANDLER) { mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); rdtscll(mib->mc_tsc); } return mib; }
static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) { struct mcinfo_bank *mib; if (!mi) return; mib = x86_mcinfo_reserve(mi, sizeof(*mib)); if (!mib) { mi->flags |= MCINFO_FLAGS_UNCOMPLETE; return; } mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank)); mib->common.type = MC_TYPE_BANK; mib->common.size = sizeof (struct mcinfo_bank); mib->mc_bank = bank; if (mib->mc_status & MCi_STATUS_MISCV) mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank)); if (mib->mc_status & MCi_STATUS_ADDRV) mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank)); if ((mib->mc_status & MCi_STATUS_MISCV) && (mib->mc_status & MCi_STATUS_ADDRV) && (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) && (who == MCA_POLLER || who == MCA_CMCI_HANDLER) && (mfn_valid(paddr_to_pfn(mib->mc_addr)))) { struct domain *d; d = maddr_get_owner(mib->mc_addr); if (d) mib->mc_domid = d->domain_id; } if (who == MCA_CMCI_HANDLER) { mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); rdtscll(mib->mc_tsc); } }
/* Put any references on the single 4K page referenced by pte. TODO: * Handle superpages, for now we only take special references for leaf * pages (specifically foreign ones, which can't be super mapped today). */ static void p2m_put_l3_page(const lpae_t pte) { ASSERT(p2m_valid(pte)); /* TODO: Handle other p2m types * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } }
/** * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out * @d: guest domain * @gfn: guest page to nominate * * Returns 0 for success or negative errno values if gfn is not pageable. * * p2m_mem_paging_nominate() is called by the pager and checks if a guest page * can be paged out. If the following conditions are met the p2mt will be * changed: * - the gfn is backed by a mfn * - the p2mt of the gfn is pageable * - the mfn is not used for IO * - the mfn has exactly one user and has no special meaning * * Once the p2mt is changed the page is readonly for the guest. On success the * pager can write the page contents to disk and later evict the page. */ int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn) { struct page_info *page; struct p2m_domain *p2m = p2m_get_hostp2m(d); p2m_type_t p2mt; p2m_access_t a; mfn_t mfn; int ret = -EBUSY; gfn_lock(p2m, gfn, 0); mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL); /* Check if mfn is valid */ if ( !mfn_valid(mfn) ) goto out; /* Check p2m type */ if ( !p2m_is_pageable(p2mt) ) goto out; /* Check for io memory page */ if ( is_iomem_page(mfn_x(mfn)) ) goto out; /* Check page count and type */ page = mfn_to_page(mfn); if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != (1 | PGC_allocated) ) goto out; if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) goto out; /* Fix p2m entry */ set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a); ret = 0; out: gfn_unlock(p2m, gfn, 0); return ret; }
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, unsigned long gva, uint32_t *pfec) { unsigned long cr3; uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; /* Get the top-level table's MFN */ cr3 = v->arch.hvm_vcpu.guest_cr[3]; top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt); if ( !p2m_is_ram(p2mt) ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, gva, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); /* Interpret the answer */ if ( missing == 0 ) return gfn_x(guest_l1e_get_gfn(gw.l1e)); if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; return INVALID_GFN; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return INVALID_GFN; } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return INVALID_GFN; } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; return INVALID_GFN; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); put_gfn(d, gmfn); /* If the page hasn't yet been paged out, there is an * actual page that needs to be released. */ if ( p2mt == p2m_ram_paging_out ) { ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } p2m_mem_paging_drop_page(d, gmfn, p2mt); return 1; } if ( p2mt == p2m_mmio_direct ) { clear_mmio_p2m_entry(d, gmfn, _mfn(mfn)); put_gfn(d, gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } #ifdef CONFIG_X86 if ( p2m_is_shared(p2mt) ) { /* Unshare the page, bail out on error. We unshare because * we might be the only one using this shared page, and we * need to trigger proper cleanup. Once done, this is * like any other page. */ if ( mem_sharing_unshare_page(d, gmfn, 0) ) { put_gfn(d, gmfn); (void)mem_sharing_notify_enomem(d, gmfn, 0); return 0; } /* Maybe the mfn changed */ mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt)); ASSERT(!p2m_is_shared(p2mt)); } #endif /* CONFIG_X86 */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); /* * With the lack of an IOMMU on some platforms, domains with DMA-capable * device must retrieve the same pfn when the hypercall populate_physmap * is called. * * For this purpose (and to match populate_physmap() behavior), the page * is kept allocated. */ if ( !is_domain_direct_mapped(d) && test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); put_gfn(d, gmfn); return 1; }
static void populate_physmap(struct memop_args *a) { struct page_info *page; unsigned int i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : max_order(current->domain)) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( i != a->nr_done && hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; if ( a->memflags & MEMF_populate_on_demand ) { if ( guest_physmap_mark_populate_on_demand(d, gpfn, a->extent_order) < 0 ) goto out; } else { if ( is_domain_direct_mapped(d) ) { mfn = gpfn; for ( j = 0; j < (1U << a->extent_order); j++, mfn++ ) { if ( !mfn_valid(mfn) ) { gdprintk(XENLOG_INFO, "Invalid mfn %#"PRI_xen_pfn"\n", mfn); goto out; } page = mfn_to_page(mfn); if ( !get_page(page, d) ) { gdprintk(XENLOG_INFO, "mfn %#"PRI_xen_pfn" doesn't belong to d%d\n", mfn, d->domain_id); goto out; } put_page(page); } mfn = gpfn; page = mfn_to_page(mfn); } else { page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(!page) ) { if ( !opt_tmem || a->extent_order ) gdprintk(XENLOG_INFO, "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } mfn = page_to_mfn(page); } guest_physmap_add_page(d, gpfn, mfn, a->extent_order); if ( !paging_mode_translate(d) ) { for ( j = 0; j < (1U << a->extent_order); j++ ) set_gpfn_from_mfn(mfn + j, gpfn + j); /* Inform the domain of the new page's machine address. */ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } } out: a->nr_done = i; }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* xn and write bit will be defined in the switch */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.sh = LPAE_SH_OUTER, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (t) { case p2m_ram_rw: e.p2m.xn = 0; e.p2m.write = 1; break; case p2m_ram_ro: e.p2m.xn = 0; e.p2m.write = 0; break; case p2m_map_foreign: case p2m_grant_map_rw: case p2m_mmio_direct: e.p2m.xn = 1; e.p2m.write = 1; break; case p2m_grant_map_ro: case p2m_invalid: e.p2m.xn = 1; e.p2m.write = 0; break; case p2m_max_real_type: BUG(); break; } ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } /* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); write_pte(entry, pte); return 0; } enum p2m_operation { INSERT, ALLOCATE, REMOVE, RELINQUISH, CACHEFLUSH, }; static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr, p2m_type_t t) { int rc; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_page = ~0, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; unsigned int flush = 0; bool_t populate = (op == INSERT || op == ALLOCATE); lpae_t pte; spin_lock(&p2m->lock); if ( d != current->domain ) p2m_load_VTTBR(d); addr = start_gpaddr; while ( addr < end_gpaddr ) { if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); first = p2m_map_first(p2m, addr); if ( !first ) { rc = -EINVAL; goto out; } cur_first_page = p2m_first_level_index(addr); } if ( !first[first_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + FIRST_SIZE) & FIRST_MASK; continue; } rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + SECOND_SIZE) & SECOND_MASK; continue; } rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } pte = third[third_table_offset(addr)]; flush |= pte.p2m.valid; /* TODO: Handle other p2m type * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; ASSERT(!pte.p2m.valid); rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case RELINQUISH: case REMOVE: { if ( !pte.p2m.valid ) { count++; break; } count += 0x10; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); count++; } break; case CACHEFLUSH: { if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) break; flush_page_to_ram(pte.p2m.base); } break; } /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ if ( op == RELINQUISH && count >= 0x2000 ) { if ( hypercall_preempt_check() ) { p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; rc = -EAGAIN; goto out; } count = 0; } /* Got the next page */ addr += PAGE_SIZE; } if ( flush ) { /* At the beginning of the function, Xen is updating VTTBR * with the domain where the mappings are created. In this * case it's only necessary to flush TLBs on every CPUs with * the current VMID (our domain). */ flush_tlb(); } if ( op == ALLOCATE || op == INSERT ) { unsigned long sgfn = paddr_to_pfn(start_gpaddr); unsigned long egfn = paddr_to_pfn(end_gpaddr); p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn); p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); if ( d != current->domain ) p2m_load_VTTBR(current->domain); spin_unlock(&p2m->lock); return rc; }
static mfn_t p2m_pt_get_entry(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; unsigned long l1e_flags; p2m_type_t l1t; ASSERT(paging_mode_translate(p2m->domain)); /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; /* Not implemented except with EPT */ *a = p2m_access_rwx; if ( gfn > p2m->max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); { l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); l4e += l4_table_offset(addr); if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) { unmap_domain_page(l4e); return _mfn(INVALID_MFN); } mfn = _mfn(l4e_get_pfn(*l4e)); unmap_domain_page(l4e); } { l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); l3e += l3_table_offset(addr); pod_retry_l3: if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) { if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); } else *t = p2m_populate_on_demand; } unmap_domain_page(l3e); return _mfn(INVALID_MFN); } else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) { mfn = _mfn(l3e_get_pfn(*l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); *t = p2m_flags_to_type(l3e_get_flags(*l3e)); unmap_domain_page(l3e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_1G; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l3e_get_pfn(*l3e)); unmap_domain_page(l3e); } l2e = map_domain_page(mfn_x(mfn)); l2e += l2_table_offset(addr); pod_retry_l2: if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) { /* PoD: Try to populate a 2-meg chunk */ if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; } unmap_domain_page(l2e); return _mfn(INVALID_MFN); } else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) { mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); *t = p2m_flags_to_type(l2e_get_flags(*l2e)); unmap_domain_page(l2e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_2M; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); l1e = map_domain_page(mfn_x(mfn)); l1e += l1_table_offset(addr); pod_retry_l1: l1e_flags = l1e_get_flags(*l1e); l1t = p2m_flags_to_type(l1e_flags); if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) ) { /* PoD: Try to populate */ if ( l1t == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); *t = l1t; unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); if ( page_order ) *page_order = PAGE_ORDER_4K; return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN); }
/* * If mem_access is in use it might have been the reason why get_page_from_gva * failed to fetch the page, as it uses the MMU for the permission checking. * Only in these cases we do a software-based type check and fetch the page if * we indeed found a conflicting mem_access setting. */ struct page_info* p2m_mem_access_check_and_get_page(vaddr_t gva, unsigned long flag, const struct vcpu *v) { long rc; paddr_t ipa; gfn_t gfn; mfn_t mfn; xenmem_access_t xma; p2m_type_t t; struct page_info *page = NULL; struct p2m_domain *p2m = &v->domain->arch.p2m; rc = gva_to_ipa(gva, &ipa, flag); if ( rc < 0 ) goto err; gfn = _gfn(paddr_to_pfn(ipa)); /* * We do this first as this is faster in the default case when no * permission is set on the page. */ rc = __p2m_get_mem_access(v->domain, gfn, &xma); if ( rc < 0 ) goto err; /* Let's check if mem_access limited the access. */ switch ( xma ) { default: case XENMEM_access_rwx: case XENMEM_access_rw: /* * If mem_access contains no rw perm restrictions at all then the original * fault was correct. */ goto err; case XENMEM_access_n2rwx: case XENMEM_access_n: case XENMEM_access_x: /* * If no r/w is permitted by mem_access, this was a fault caused by mem_access. */ break; case XENMEM_access_wx: case XENMEM_access_w: /* * If this was a read then it was because of mem_access, but if it was * a write then the original get_page_from_gva fault was correct. */ if ( flag == GV2M_READ ) break; else goto err; case XENMEM_access_rx2rw: case XENMEM_access_rx: case XENMEM_access_r: /* * If this was a write then it was because of mem_access, but if it was * a read then the original get_page_from_gva fault was correct. */ if ( flag == GV2M_WRITE ) break; else goto err; } /* * We had a mem_access permission limiting the access, but the page type * could also be limiting, so we need to check that as well. */ mfn = p2m_get_entry(p2m, gfn, &t, NULL, NULL); if ( mfn_eq(mfn, INVALID_MFN) ) goto err; if ( !mfn_valid(mfn) ) goto err; /* * Base type doesn't allow r/w */ if ( t != p2m_ram_rw ) goto err; page = mfn_to_page(mfn_x(mfn)); if ( unlikely(!get_page(page, v->domain)) ) page = NULL; err: return page; }
long p2m_pt_audit_p2m(struct p2m_domain *p2m) { unsigned long entry_count = 0, pmbad = 0; unsigned long mfn, gfn, m2pfn; int test_linear; struct domain *d = p2m->domain; ASSERT(p2m_locked_by_me(p2m)); ASSERT(pod_locked_by_me(p2m)); test_linear = ( (d == current->domain) && !pagetable_is_null(current->arch.monitor_table) ); if ( test_linear ) flush_tlb_local(); /* Audit part one: walk the domain's p2m table, checking the entries. */ if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { l2_pgentry_t *l2e; l1_pgentry_t *l1e; int i1, i2; #if CONFIG_PAGING_LEVELS == 4 l4_pgentry_t *l4e; l3_pgentry_t *l3e; int i4, i3; l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #else /* CONFIG_PAGING_LEVELS == 3 */ l3_pgentry_t *l3e; int i3; l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #endif gfn = 0; #if CONFIG_PAGING_LEVELS >= 4 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) { if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) { gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); #endif for ( i3 = 0; i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); i3++ ) { if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) { gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for 1GB super page */ if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE ) { mfn = l3e_get_pfn(l3e[i3]); ASSERT(mfn_valid(_mfn(mfn))); /* we have to cover 512x512 4K pages */ for ( i2 = 0; i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES); i2++) { m2pfn = get_gpfn_from_mfn(mfn+i2); if ( m2pfn != (gfn + i2) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i2, mfn+i2, m2pfn); BUG(); } gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } } l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) { if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) && ( p2m_flags_to_type(l2e_get_flags(l2e[i2])) == p2m_populate_on_demand ) ) entry_count+=SUPERPAGE_PAGES; gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for super page */ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE ) { mfn = l2e_get_pfn(l2e[i2]); ASSERT(mfn_valid(_mfn(mfn))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++) { m2pfn = get_gpfn_from_mfn(mfn+i1); /* Allow shared M2Ps */ if ( (m2pfn != (gfn + i1)) && (m2pfn != SHARED_M2P_ENTRY) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i1, mfn+i1, m2pfn); BUG(); } } gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) { p2m_type_t type; type = p2m_flags_to_type(l1e_get_flags(l1e[i1])); if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) { if ( type == p2m_populate_on_demand ) entry_count++; continue; } mfn = l1e_get_pfn(l1e[i1]); ASSERT(mfn_valid(_mfn(mfn))); m2pfn = get_gpfn_from_mfn(mfn); if ( m2pfn != gfn && type != p2m_mmio_direct && !p2m_is_grant(type) && !p2m_is_shared(type) ) { pmbad++; printk("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); BUG(); } } unmap_domain_page(l1e); } unmap_domain_page(l2e); } #if CONFIG_PAGING_LEVELS >= 4 unmap_domain_page(l3e); } #endif #if CONFIG_PAGING_LEVELS == 4 unmap_domain_page(l4e); #else /* CONFIG_PAGING_LEVELS == 3 */ unmap_domain_page(l3e); #endif } if ( entry_count != p2m->pod.entry_count ) { printk("%s: refcounted entry count %ld, audit count %lu!\n", __func__, p2m->pod.entry_count, entry_count); BUG(); } return pmbad; }
static mfn_t p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; unsigned long l1e_flags; p2m_type_t l1t; ASSERT(paging_mode_translate(p2m->domain)); /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; /* Not implemented except with EPT */ *a = p2m_access_rwx; if ( gfn > p2m->max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); /* Use the fast path with the linear mapping if we can */ if ( p2m == p2m_get_hostp2m(current->domain) ) return p2m_gfn_to_mfn_current(p2m, gfn, t, a, q, page_order); mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); #if CONFIG_PAGING_LEVELS >= 4 { l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); l4e += l4_table_offset(addr); if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) { unmap_domain_page(l4e); return _mfn(INVALID_MFN); } mfn = _mfn(l4e_get_pfn(*l4e)); unmap_domain_page(l4e); } #endif { l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); #if CONFIG_PAGING_LEVELS == 3 /* On PAE hosts the p2m has eight l3 entries, not four (see * shadow_set_p2m_entry()) so we can't use l3_table_offset. * Instead, just count the number of l3es from zero. It's safe * to do this because we already checked that the gfn is within * the bounds of the p2m. */ l3e += (addr >> L3_PAGETABLE_SHIFT); #else l3e += l3_table_offset(addr); #endif pod_retry_l3: if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) { if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); } else *t = p2m_populate_on_demand; } unmap_domain_page(l3e); return _mfn(INVALID_MFN); } else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) { mfn = _mfn(l3e_get_pfn(*l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); *t = p2m_flags_to_type(l3e_get_flags(*l3e)); unmap_domain_page(l3e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_1G; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l3e_get_pfn(*l3e)); unmap_domain_page(l3e); } l2e = map_domain_page(mfn_x(mfn)); l2e += l2_table_offset(addr); pod_retry_l2: if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) { /* PoD: Try to populate a 2-meg chunk */ if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; } unmap_domain_page(l2e); return _mfn(INVALID_MFN); } else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) { mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); *t = p2m_flags_to_type(l2e_get_flags(*l2e)); unmap_domain_page(l2e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_2M; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); l1e = map_domain_page(mfn_x(mfn)); l1e += l1_table_offset(addr); pod_retry_l1: l1e_flags = l1e_get_flags(*l1e); l1t = p2m_flags_to_type(l1e_flags); if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) ) { /* PoD: Try to populate */ if ( l1t == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); *t = l1t; unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); if ( page_order ) *page_order = PAGE_ORDER_4K; return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN); }
/* Read the current domain's p2m table (through the linear mapping). */ static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn = _mfn(INVALID_MFN); p2m_type_t p2mt = p2m_mmio_dm; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ l1_pgentry_t l1e = l1e_empty(), *p2m_entry; l2_pgentry_t l2e = l2e_empty(); int ret; #if CONFIG_PAGING_LEVELS >= 4 l3_pgentry_t l3e = l3e_empty(); #endif ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); #if CONFIG_PAGING_LEVELS >= 4 /* * Read & process L3 */ p2m_entry = (l1_pgentry_t *) &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START) + l3_linear_offset(addr)]; pod_retry_l3: ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e)); if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) { if ( (l3e_get_flags(l3e) & _PAGE_PSE) && (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) ) { /* The read has succeeded, so we know that mapping exists */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; p2mt = p2m_invalid; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } goto pod_retry_l2; } if ( l3e_get_flags(l3e) & _PAGE_PSE ) { p2mt = p2m_flags_to_type(l3e_get_flags(l3e)); ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt)); if (p2m_is_valid(p2mt) ) mfn = _mfn(l3e_get_pfn(l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); else p2mt = p2m_mmio_dm; if ( page_order ) *page_order = PAGE_ORDER_1G; goto out; } #endif /* * Read & process L2 */ p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)]; pod_retry_l2: ret = __copy_from_user(&l2e, p2m_entry, sizeof(l2e)); if ( ret != 0 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) { if( (l2e_get_flags(l2e) & _PAGE_PSE) && ( p2m_flags_to_type(l2e_get_flags(l2e)) == p2m_populate_on_demand ) ) { /* The read has succeeded, so we know that the mapping * exits at this point. */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; /* Allocate failed. */ p2mt = p2m_invalid; printk("%s: Allocate failed!\n", __func__); goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } goto pod_retry_l1; } if (l2e_get_flags(l2e) & _PAGE_PSE) { p2mt = p2m_flags_to_type(l2e_get_flags(l2e)); ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt)); if ( p2m_is_valid(p2mt) ) mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr)); else p2mt = p2m_mmio_dm; if ( page_order ) *page_order = PAGE_ORDER_2M; goto out; } /* * Read and process L1 */ /* Need to __copy_from_user because the p2m is sparse and this * part might not exist */ pod_retry_l1: p2m_entry = &phys_to_machine_mapping[gfn]; ret = __copy_from_user(&l1e, p2m_entry, sizeof(l1e)); if ( ret == 0 ) { unsigned long l1e_mfn = l1e_get_pfn(l1e); p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); ASSERT( mfn_valid(_mfn(l1e_mfn)) || !p2m_is_ram(p2mt) || p2m_is_paging(p2mt) ); if ( p2mt == p2m_populate_on_demand ) { /* The read has succeeded, so we know that the mapping * exits at this point. */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; /* Allocate failed. */ p2mt = p2m_invalid; goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) mfn = _mfn(l1e_mfn); else /* XXX see above */ p2mt = p2m_mmio_dm; } if ( page_order ) *page_order = PAGE_ORDER_4K; out: *t = p2mt; return mfn; }
// Returns 0 on error (out of memory) static int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); void *table =map_domain_page(mfn_x(table_mfn)); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; l2_pgentry_t l2e_content; l3_pgentry_t l3e_content; int rv=0; unsigned int iommu_pte_flags = (p2mt == p2m_ram_rw) ? IOMMUF_readable|IOMMUF_writable: 0; unsigned long old_mfn = 0; if ( tb_init_done ) { struct { u64 gfn, mfn; int p2mt; int d:16,order:16; } t; t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } #if CONFIG_PAGING_LEVELS >= 4 if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) goto out; #endif /* * Try to allocate 1GB page table if this feature is supported. */ if ( page_order == PAGE_ORDER_1G ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* * When using PAE Xen, we only allow 33 bits of pseudo-physical * address in translated guests (i.e. 8 GBytes). This restriction * comes from wanting to map the P2M table into the 16MB RO_MPT hole * in Xen's address space for translated PV guests. * When using AMD's NPT on PAE Xen, we are restricted to 4GB. */ else if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, ((CONFIG_PAGING_LEVELS == 3) ? (hap_enabled(p2m->domain) ? 4 : 8) : L3_PAGETABLE_ENTRIES), PGT_l2_page_table) ) goto out; if ( page_order == PAGE_ORDER_4K ) { if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) goto out; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || p2m_is_paging(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn)); else entry_content = l1e_empty(); if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } /* level 1 entry */ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == PAGE_ORDER_2M ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); ASSERT(p2m_entry); /* FIXME: Deal with 4k replaced by 2meg pages */ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_is_magic(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE); else l2e_content = l2e_empty(); entry_content.l1 = l2e_content.l2; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; if ( iommu_enabled && need_iommu(p2m->domain) ) { if ( iommu_hap_pt_share ) { if ( old_mfn && (old_mfn != mfn_x(mfn)) ) amd_iommu_flush_pages(p2m->domain, gfn, page_order); } else { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i, IOMMUF_readable|IOMMUF_writable); else for ( int i = 0; i < (1UL << page_order); i++ ) iommu_unmap_page(p2m->domain, gfn+i); } } /* Success */ rv = 1; out: unmap_domain_page(table); return rv; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return gfn_x(INVALID_GFN); } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; goto out_tweak_pfec; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(top_mfn)); top_map = map_domain_page(top_mfn); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_walk_to_gfn(&gw); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return gfn_x(INVALID_GFN); } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PKEY_BITS ) pfec[0] |= PFEC_prot_key; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; out_tweak_pfec: /* * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS: * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled. */ if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) ) pfec[0] &= ~PFEC_insn_fetch; return gfn_x(INVALID_GFN); }
/* * Returns 0 if TLB flush / invalidate required by caller. * va will indicate the address to be invalidated. * * addr is _either_ a host virtual address, or the address of the pte to * update, as indicated by the GNTMAP_contains_pte flag. */ static void __gnttab_map_grant_ref( struct gnttab_map_grant_ref *op) { struct domain *ld, *rd; struct vcpu *led; int handle; unsigned long frame = 0; int rc = GNTST_okay; unsigned int cache_flags; struct active_grant_entry *act; struct grant_mapping *mt; grant_entry_t *sha; union grant_combo scombo, prev_scombo, new_scombo; /* * We bound the number of times we retry CMPXCHG on memory locations that * we share with a guest OS. The reason is that the guest can modify that * location at a higher rate than we can read-modify-CMPXCHG, so the guest * could cause us to livelock. There are a few cases where it is valid for * the guest to race our updates (e.g., to change the GTF_readonly flag), * so we allow a few retries before failing. */ int retries = 0; led = current; ld = led->domain; if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) { gdprintk(XENLOG_INFO, "Bad flags in grant map op (%x).\n", op->flags); op->status = GNTST_bad_gntref; return; } if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) ) { gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom); op->status = GNTST_bad_domain; return; } rc = xsm_grant_mapref(ld, rd, op->flags); if ( rc ) { rcu_unlock_domain(rd); op->status = GNTST_permission_denied; return; } if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) { rcu_unlock_domain(rd); gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n"); op->status = GNTST_no_device_space; return; } spin_lock(&rd->grant_table->lock); /* Bounds check on the grant ref */ if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table))) PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref); act = &active_entry(rd->grant_table, op->ref); sha = &shared_entry(rd->grant_table, op->ref); /* If already pinned, check the active domid and avoid refcnt overflow. */ if ( act->pin && ((act->domid != ld->domain_id) || (act->pin & 0x80808080U) != 0) ) PIN_FAIL(unlock_out, GNTST_general_error, "Bad domain (%d != %d), or risk of counter overflow %08x\n", act->domid, ld->domain_id, act->pin); if ( !act->pin || (!(op->flags & GNTMAP_readonly) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) ) { scombo.word = *(u32 *)&sha->flags; /* * This loop attempts to set the access (reading/writing) flags * in the grant table entry. It tries a cmpxchg on the field * up to five times, and then fails under the assumption that * the guest is misbehaving. */ for ( ; ; ) { /* If not already pinned, check the grant domid and type. */ if ( !act->pin && (((scombo.shorts.flags & GTF_type_mask) != GTF_permit_access) || (scombo.shorts.domid != ld->domain_id)) ) PIN_FAIL(unlock_out, GNTST_general_error, "Bad flags (%x) or dom (%d). (expected dom %d)\n", scombo.shorts.flags, scombo.shorts.domid, ld->domain_id); new_scombo = scombo; new_scombo.shorts.flags |= GTF_reading; if ( !(op->flags & GNTMAP_readonly) ) { new_scombo.shorts.flags |= GTF_writing; if ( unlikely(scombo.shorts.flags & GTF_readonly) ) PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } prev_scombo.word = cmpxchg((u32 *)&sha->flags, scombo.word, new_scombo.word); if ( likely(prev_scombo.word == scombo.word) ) break; if ( retries++ == 4 ) PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); scombo = prev_scombo; } if ( !act->pin ) { act->domid = scombo.shorts.domid; act->frame = gmfn_to_mfn(rd, sha->frame); } } if ( op->flags & GNTMAP_device_map ) act->pin += (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; if ( op->flags & GNTMAP_host_map ) act->pin += (op->flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act->frame; cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); spin_unlock(&rd->grant_table->lock); if ( is_iomem_page(frame) ) { if ( !iomem_access_permitted(rd, frame, frame) ) { gdprintk(XENLOG_WARNING, "Iomem mapping not permitted %lx (domain %d)\n", frame, rd->domain_id); rc = GNTST_general_error; goto undo_out; } rc = create_grant_host_mapping( op->host_addr, frame, op->flags, cache_flags); if ( rc != GNTST_okay ) goto undo_out; } else { if ( unlikely(!mfn_valid(frame)) || unlikely(!(gnttab_host_mapping_get_page_type(op, ld, rd) ? get_page_and_type(mfn_to_page(frame), rd, PGT_writable_page) : get_page(mfn_to_page(frame), rd))) ) { if ( !rd->is_dying ) gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n", frame); rc = GNTST_general_error; goto undo_out; } if ( op->flags & GNTMAP_host_map ) { rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0); if ( rc != GNTST_okay ) { if ( gnttab_host_mapping_get_page_type(op, ld, rd) ) put_page_type(mfn_to_page(frame)); put_page(mfn_to_page(frame)); goto undo_out; } if ( op->flags & GNTMAP_device_map ) { (void)get_page(mfn_to_page(frame), rd); if ( !(op->flags & GNTMAP_readonly) ) get_page_type(mfn_to_page(frame), PGT_writable_page); } } } TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom); mt = &maptrack_entry(ld->grant_table, handle); mt->domid = op->dom; mt->ref = op->ref; mt->flags = op->flags; op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; op->status = GNTST_okay; rcu_unlock_domain(rd); return; undo_out: spin_lock(&rd->grant_table->lock); act = &active_entry(rd->grant_table, op->ref); sha = &shared_entry(rd->grant_table, op->ref); if ( op->flags & GNTMAP_device_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; if ( op->flags & GNTMAP_host_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; if ( !(op->flags & GNTMAP_readonly) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) gnttab_clear_flag(_GTF_writing, &sha->flags); if ( !act->pin ) gnttab_clear_flag(_GTF_reading, &sha->flags); unlock_out: spin_unlock(&rd->grant_table->lock); op->status = rc; put_maptrack_handle(ld->grant_table, handle); rcu_unlock_domain(rd); }