int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) { struct p2m_domain *p2m = p2m_get_hostp2m(d); int rc = 0; p2m_access_t a; p2m_type_t ot; mfn_t omfn; unsigned long pg_type; if ( !paging_mode_translate(p2m->domain) ) return 0; gfn_lock(p2m, gfn, 0); omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL); /* At the moment we only allow p2m change if gfn has already been made * sharable first */ ASSERT(p2m_is_shared(ot)); ASSERT(mfn_valid(omfn)); /* Set the m2p entry to invalid only if there are no further type * refs to this page as shared */ pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info)); if ( (pg_type & PGT_count_mask) == 0 || (pg_type & PGT_type_mask) != PGT_shared_page ) set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn)); rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared, p2m->default_access); gfn_unlock(p2m, gfn, 0); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot))); return rc; }
int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) { int rc = 0; p2m_access_t a; p2m_type_t ot; mfn_t omfn; struct p2m_domain *p2m = p2m_get_hostp2m(d); if ( !paging_mode_translate(d) ) return 0; gfn_lock(p2m, gfn, 0); omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL); if ( p2m_is_grant(ot) ) { p2m_unlock(p2m); domain_crash(d); return 0; } else if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn)); rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct, p2m->default_access); gfn_unlock(p2m, gfn, 0); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot))); return rc; }
int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { struct domain *d = p2m->domain; unsigned long todo = 1ul << page_order; unsigned int order; int rc = 1; ASSERT(gfn_locked_by_me(p2m, gfn)); while ( todo ) { if ( hap_enabled(d) ) order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << PAGE_ORDER_1G) - 1)) == 0) && hvm_hap_has_1gb(d) && opt_hap_1gb ) ? PAGE_ORDER_1G : ((((gfn | mfn_x(mfn) | todo) & ((1ul << PAGE_ORDER_2M) - 1)) == 0) && hvm_hap_has_2mb(d) && opt_hap_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K; else order = 0; if ( !p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma) ) rc = 0; gfn += 1ul << order; if ( mfn_x(mfn) != INVALID_MFN ) mfn = _mfn(mfn_x(mfn) + (1ul << order)); todo -= 1ul << order; } return rc; }
/* * Currently all CPUs are redenzevous at the MCE softirq handler, no * need to consider paging p2m type * Currently only support HVM guest with EPT paging mode * XXX following situation missed: * PoD, Foreign mapped, Granted, Shared */ int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn) { mfn_t r_mfn; p2m_type_t pt; int rc; /* Always trust dom0's MCE handler will prevent future access */ if ( d == dom0 ) return 0; if (!mfn_valid(mfn_x(mfn))) return -EINVAL; if ( !has_hvm_container_domain(d) || !paging_mode_hap(d) ) return -ENOSYS; rc = -1; r_mfn = get_gfn_query(d, gfn, &pt); if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES) { ASSERT(mfn_x(r_mfn) == mfn_x(mfn)); p2m_change_type(d, gfn, pt, p2m_ram_broken); rc = 0; } put_gfn(d, gfn); return rc; }
LOCAL int mcd_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn) { int rc = 0; /* Map ring and shared pages */ d->mcd_event.ring_page = map_domain_page(mfn_x(ring_mfn)); if ( d->mcd_event.ring_page == NULL ) goto err; //printk("domain_id = %d, ring_page = %p \n", d->domain_id, d->mcd_event.ring_page); // default = 1 d->mcd_event.num_shared_page = 1; d->mcd_event.shared_page[0] = map_domain_page(mfn_x(shared_mfn)); if ( d->mcd_event.shared_page[0] == NULL ) goto err_ring; // TODO check this... whether we need this or just using ring for notification... // TODO however, ring notification should have some delay incurred before receiving... /* Allocate event channel */ rc = alloc_unbound_xen_event_channel(d->vcpu[0], current->domain->domain_id); if ( rc < 0 ) goto err_shared; // XXX since we use data as a buffer.. this is the way to avoid future conflict memcpy(((mcd_event_shared_page_t *)d->mcd_event.shared_page[0])->data, &rc, sizeof(int)); d->mcd_event.xen_port = rc; /* Prepare ring buffer */ FRONT_RING_INIT(&d->mcd_event.front_ring, (mcd_event_sring_t *)d->mcd_event.ring_page, PAGE_SIZE); //printk("ring buffer size = %d \n", (&(d->mcd_event.front_ring))->nr_ents); mcd_event_ring_lock_init(d); /* Wake any VCPUs paused for memory events */ //mcd_event_unpause_vcpus(d); init_mcdctl(); return 0; err_shared: unmap_domain_page(d->mcd_event.shared_page[0]); d->mcd_event.shared_page[0] = NULL; err_ring: unmap_domain_page(d->mcd_event.ring_page); d->mcd_event.ring_page = NULL; err: return 1; }
static void p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order) { unsigned long i; mfn_t mfn_return; p2m_type_t t; p2m_access_t a; if ( !paging_mode_translate(p2m->domain) ) { if ( need_iommu(p2m->domain) ) for ( i = 0; i < (1 << page_order); i++ ) iommu_unmap_page(p2m->domain, mfn + i); return; } ASSERT(gfn_locked_by_me(p2m, gfn)); P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); if ( mfn_valid(_mfn(mfn)) ) { for ( i = 0; i < (1UL << page_order); i++ ) { mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL); if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); } } set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access); }
int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn) { int rc = 0; mfn_t mfn; p2m_access_t a; p2m_type_t t; struct p2m_domain *p2m = p2m_get_hostp2m(d); if ( !paging_mode_translate(d) ) return 0; gfn_lock(p2m, gfn, 0); mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL); /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */ if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) ) { gdprintk(XENLOG_ERR, "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn); goto out; } rc = set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, p2m->default_access); out: gfn_unlock(p2m, gfn, 0); return rc; }
/* Returns: mfn for the given (hvm guest) vaddr */ static mfn_t dbg_hvm_va2mfn(dbgva_t vaddr, struct domain *dp, int toaddr, gfn_t *gfn) { mfn_t mfn; uint32_t pfec = PFEC_page_present; p2m_type_t gfntype; DBGP2("vaddr:%lx domid:%d\n", vaddr, dp->domain_id); *gfn = _gfn(paging_gva_to_gfn(dp->vcpu[0], vaddr, &pfec)); if ( gfn_eq(*gfn, INVALID_GFN) ) { DBGP2("kdb:bad gfn from gva_to_gfn\n"); return INVALID_MFN; } mfn = get_gfn(dp, gfn_x(*gfn), &gfntype); if ( p2m_is_readonly(gfntype) && toaddr ) { DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype); mfn = INVALID_MFN; } else DBGP2("X: vaddr:%lx domid:%d mfn:%#"PRI_mfn"\n", vaddr, dp->domain_id, mfn_x(mfn)); if ( mfn_eq(mfn, INVALID_MFN) ) { put_gfn(dp, gfn_x(*gfn)); *gfn = INVALID_GFN; } return mfn; }
static void enable_hypercall_page(struct domain *d) { unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); uint8_t *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); /* * We set the bit 31 in %eax (reserved field in the Viridian hypercall * calling convention) to differentiate Xen and Viridian hypercalls. */ *(u8 *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */ *(u32 *)(p + 1) = 0x80000000; *(u8 *)(p + 5) = 0x0f; /* vmcall/vmmcall */ *(u8 *)(p + 6) = 0x01; *(u8 *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9); *(u8 *)(p + 8) = 0xc3; /* ret */ memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */ unmap_domain_page(p); put_page_and_type(page); }
/* Returns: mfn for the given (hvm guest) vaddr */ static unsigned long dbg_hvm_va2mfn(dbgva_t vaddr, struct domain *dp, int toaddr, unsigned long *gfn) { unsigned long mfn; uint32_t pfec = PFEC_page_present; p2m_type_t gfntype; DBGP2("vaddr:%lx domid:%d\n", vaddr, dp->domain_id); *gfn = paging_gva_to_gfn(dp->vcpu[0], vaddr, &pfec); if ( *gfn == INVALID_GFN ) { DBGP2("kdb:bad gfn from gva_to_gfn\n"); return INVALID_MFN; } mfn = mfn_x(get_gfn(dp, *gfn, &gfntype)); if ( p2m_is_readonly(gfntype) && toaddr ) { DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype); return INVALID_MFN; } DBGP2("X: vaddr:%lx domid:%d mfn:%lx\n", vaddr, dp->domain_id, mfn); return mfn; }
/* * Mark (via clearing the U flag) as needing P2M type re-calculation all valid * present entries at the targeted level for the passed in GFN range, which is * guaranteed to not cross a page (table) boundary at that level. */ static int p2m_pt_set_recalc_range(struct p2m_domain *p2m, unsigned int level, unsigned long first_gfn, unsigned long last_gfn) { void *table; unsigned long gfn_remainder = first_gfn, remainder; unsigned int i; l1_pgentry_t *pent, *plast; int err = 0; table = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); for ( i = 4; i-- > level; ) { remainder = gfn_remainder; pent = p2m_find_entry(table, &remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( !pent ) { err = -EINVAL; goto out; } if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) ) goto out; err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER, pgt[i - 1], 1); if ( err ) goto out; } remainder = gfn_remainder + (last_gfn - first_gfn); pent = p2m_find_entry(table, &gfn_remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); plast = p2m_find_entry(table, &remainder, last_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( pent && plast ) for ( ; pent <= plast; ++pent ) { l1_pgentry_t e = *pent; if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) ) { set_recalc(l1, e); p2m->write_p2m_entry(p2m, first_gfn, pent, e, level); } first_gfn += 1UL << (i * PAGETABLE_ORDER); } else err = -EIO; out: unmap_domain_page(table); return err; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } page = mfn_to_page(mfn); #ifdef CONFIG_X86 /* If gmfn is shared, just drop the guest reference (which may or may not * free the page) */ if(p2m_is_shared(p2mt)) { put_page_and_type(page); guest_physmap_remove_page(d, gmfn, mfn, 0); return 1; } #endif /* CONFIG_X86 */ if ( unlikely(!get_page(page, d)) ) { gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); return 1; }
static int __init pvh_setup_vmx_realmode_helpers(struct domain *d) { p2m_type_t p2mt; uint32_t rc, *ident_pt; mfn_t mfn; paddr_t gaddr; struct vcpu *v = d->vcpu[0]; /* * Steal some space from the last RAM region below 4GB and use it to * store the real-mode TSS. It needs to be aligned to 128 so that the * TSS structure (which accounts for the first 104b) doesn't cross * a page boundary. */ if ( !pvh_steal_ram(d, HVM_VM86_TSS_SIZE, 128, GB(4), &gaddr) ) { if ( hvm_copy_to_guest_phys(gaddr, NULL, HVM_VM86_TSS_SIZE, v) != HVMCOPY_okay ) printk("Unable to zero VM86 TSS area\n"); d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZED] = VM86_TSS_UPDATED | ((uint64_t)HVM_VM86_TSS_SIZE << 32) | gaddr; if ( pvh_add_mem_range(d, gaddr, gaddr + HVM_VM86_TSS_SIZE, E820_RESERVED) ) printk("Unable to set VM86 TSS as reserved in the memory map\n"); } else printk("Unable to allocate VM86 TSS area\n"); /* Steal some more RAM for the identity page tables. */ if ( pvh_steal_ram(d, PAGE_SIZE, PAGE_SIZE, GB(4), &gaddr) ) { printk("Unable to find memory to stash the identity page tables\n"); return -ENOMEM; } /* * Identity-map page table is required for running with CR0.PG=0 * when using Intel EPT. Create a 32-bit non-PAE page directory of * superpages. */ ident_pt = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)), &mfn, &p2mt, 0, &rc); if ( ident_pt == NULL ) { printk("Unable to map identity page tables\n"); return -ENOMEM; } write_32bit_pse_identmap(ident_pt); unmap_domain_page(ident_pt); put_page(mfn_to_page(mfn_x(mfn))); d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] = gaddr; if ( pvh_add_mem_range(d, gaddr, gaddr + PAGE_SIZE, E820_RESERVED) ) printk("Unable to set identity page tables as reserved in the memory map\n"); return 0; }
// Allocate a new p2m table for a domain. // // The structure of the p2m table is that of a pagetable for xen (i.e. it is // controlled by CONFIG_PAGING_LEVELS). // // Returns 0 for success or -errno. // int p2m_alloc_table(struct p2m_domain *p2m) { struct page_info *p2m_top; struct domain *d = p2m->domain; p2m_lock(p2m); if ( !p2m_is_nestedp2m(p2m) && !page_list_empty(&d->page_list) ) { P2M_ERROR("dom %d already has memory allocated\n", d->domain_id); p2m_unlock(p2m); return -EINVAL; } if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { P2M_ERROR("p2m already allocated for this domain\n"); p2m_unlock(p2m); return -EINVAL; } P2M_PRINTK("allocating p2m table\n"); p2m_top = p2m_alloc_ptp(p2m, PGT_l4_page_table); if ( p2m_top == NULL ) { p2m_unlock(p2m); return -ENOMEM; } p2m->phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); if ( hap_enabled(d) ) iommu_share_p2m_table(d); P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ p2m->defer_nested_flush = 1; if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, p2m->default_access) ) goto error; p2m->defer_nested_flush = 0; P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); p2m_unlock(p2m); return 0; spin_unlock(&p2m->domain->page_alloc_lock); error: P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" PRI_mfn "\n", gfn, mfn_x(mfn)); p2m_unlock(p2m); return -ENOMEM; }
static int mem_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn) { int rc; /* Map ring and shared pages */ d->mem_event.ring_page = map_domain_page(mfn_x(ring_mfn)); if ( d->mem_event.ring_page == NULL ) goto err; d->mem_event.shared_page = map_domain_page(mfn_x(shared_mfn)); if ( d->mem_event.shared_page == NULL ) goto err_ring; /* Allocate event channel */ rc = alloc_unbound_xen_event_channel(d->vcpu[0], current->domain->domain_id); if ( rc < 0 ) goto err_shared; ((mem_event_shared_page_t *)d->mem_event.shared_page)->port = rc; d->mem_event.xen_port = rc; /* Prepare ring buffer */ FRONT_RING_INIT(&d->mem_event.front_ring, (mem_event_sring_t *)d->mem_event.ring_page, PAGE_SIZE); mem_event_ring_lock_init(d); /* Wake any VCPUs paused for memory events */ mem_event_unpause_vcpus(d); return 0; err_shared: unmap_domain_page(d->mem_event.shared_page); d->mem_event.shared_page = NULL; err_ring: unmap_domain_page(d->mem_event.ring_page); d->mem_event.ring_page = NULL; err: return 1; }
/* * Set access type for a region of gfns. * If gfn == INVALID_GFN, sets the default access type. */ long p2m_set_mem_access(struct domain *d, gfn_t gfn, uint32_t nr, uint32_t start, uint32_t mask, xenmem_access_t access, unsigned int altp2m_idx) { struct p2m_domain *p2m = p2m_get_hostp2m(d), *ap2m = NULL; p2m_access_t a; unsigned long gfn_l; long rc = 0; /* altp2m view 0 is treated as the hostp2m */ if ( altp2m_idx ) { if ( altp2m_idx >= MAX_ALTP2M || d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) ) return -EINVAL; ap2m = d->arch.altp2m_p2m[altp2m_idx]; } if ( !xenmem_access_to_p2m_access(p2m, access, &a) ) return -EINVAL; /* If request to set default access. */ if ( gfn_eq(gfn, INVALID_GFN) ) { p2m->default_access = a; return 0; } p2m_lock(p2m); if ( ap2m ) p2m_lock(ap2m); for ( gfn_l = gfn_x(gfn) + start; nr > start; ++gfn_l ) { rc = set_mem_access(d, p2m, ap2m, a, _gfn(gfn_l)); if ( rc ) break; /* Check for continuation if it's not the last iteration. */ if ( nr > ++start && !(start & mask) && hypercall_preempt_check() ) { rc = start; break; } } if ( ap2m ) p2m_unlock(ap2m); p2m_unlock(p2m); return rc; }
static void initialize_apic_assist(struct vcpu *v) { struct domain *d = v->domain; unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.msr.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); void *va; /* * See section 13.3.4.1 of the specification for details of this * enlightenment. */ if ( !page ) goto fail; if ( !get_page_type(page, PGT_writable_page) ) { put_page(page); goto fail; } va = __map_domain_page_global(page); if ( !va ) { put_page_and_type(page); goto fail; } *(uint32_t *)va = 0; if ( viridian_feature_mask(v->domain) & HVMPV_apic_assist ) { /* * If we overwrite an existing address here then something has * gone wrong and a domain page will leak. Instead crash the * domain to make the problem obvious. */ if ( v->arch.hvm_vcpu.viridian.apic_assist.va ) domain_crash(d); v->arch.hvm_vcpu.viridian.apic_assist.va = va; return; } unmap_domain_page_global(va); put_page_and_type(page); return; fail: gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); }
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, unsigned long gva, uint32_t *pfec) { unsigned long cr3; uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; /* Get the top-level table's MFN */ cr3 = v->arch.hvm_vcpu.guest_cr[3]; top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt); if ( !p2m_is_ram(p2mt) ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, gva, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); /* Interpret the answer */ if ( missing == 0 ) return gfn_x(guest_l1e_get_gfn(gw.l1e)); if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; return INVALID_GFN; }
static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) { unsigned long flags; #ifdef __x86_64__ /* * AMD IOMMU: When we share p2m table with iommu, bit 9 - bit 11 will be * used for iommu hardware to encode next io page level. Bit 59 - bit 62 * are used for iommu flags, We could not use these bits to store p2m types. */ flags = (unsigned long)(t & 0x7f) << 12; #else flags = (t & 0x7UL) << 9; #endif #ifndef __x86_64__ /* 32-bit builds don't support a lot of the p2m types */ BUG_ON(t > p2m_populate_on_demand); #endif switch(t) { case p2m_invalid: case p2m_mmio_dm: case p2m_populate_on_demand: case p2m_ram_paging_out: case p2m_ram_paged: case p2m_ram_paging_in: default: return flags | _PAGE_NX_BIT; case p2m_grant_map_ro: return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT; case p2m_ram_ro: case p2m_ram_logdirty: case p2m_ram_shared: return flags | P2M_BASE_FLAGS; case p2m_ram_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW; case p2m_grant_map_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; return flags | P2M_BASE_FLAGS | _PAGE_PCD; } }
LOCAL int mcd_event_add_shared_page(struct domain *d, mfn_t shared_mfn) { int curr_num = d->mcd_event.num_shared_page; if ( curr_num < 0 || curr_num >= MAX_SHARED_PAGES ) { printk("curr_num(%d) is wrong..\n", curr_num); return 1; } d->mcd_event.shared_page[curr_num] = map_domain_page(mfn_x(shared_mfn)); if ( d->mcd_event.shared_page[curr_num] == NULL ) { printk("d->mcd_event.shared_page[%d] == NULL.....\n", curr_num); return 1; } d->mcd_event.num_shared_page++; return 0; }
/** * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out * @d: guest domain * @gfn: guest page to nominate * * Returns 0 for success or negative errno values if gfn is not pageable. * * p2m_mem_paging_nominate() is called by the pager and checks if a guest page * can be paged out. If the following conditions are met the p2mt will be * changed: * - the gfn is backed by a mfn * - the p2mt of the gfn is pageable * - the mfn is not used for IO * - the mfn has exactly one user and has no special meaning * * Once the p2mt is changed the page is readonly for the guest. On success the * pager can write the page contents to disk and later evict the page. */ int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn) { struct page_info *page; struct p2m_domain *p2m = p2m_get_hostp2m(d); p2m_type_t p2mt; p2m_access_t a; mfn_t mfn; int ret = -EBUSY; gfn_lock(p2m, gfn, 0); mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL); /* Check if mfn is valid */ if ( !mfn_valid(mfn) ) goto out; /* Check p2m type */ if ( !p2m_is_pageable(p2mt) ) goto out; /* Check for io memory page */ if ( is_iomem_page(mfn_x(mfn)) ) goto out; /* Check page count and type */ page = mfn_to_page(mfn); if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != (1 | PGC_allocated) ) goto out; if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) goto out; /* Fix p2m entry */ set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a); ret = 0; out: gfn_unlock(p2m, gfn, 0); return ret; }
static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn, unsigned int level) { unsigned long flags; /* * AMD IOMMU: When we share p2m table with iommu, bit 9 - bit 11 will be * used for iommu hardware to encode next io page level. Bit 59 - bit 62 * are used for iommu flags, We could not use these bits to store p2m types. */ flags = (unsigned long)(t & 0x7f) << 12; switch(t) { case p2m_invalid: case p2m_mmio_dm: case p2m_populate_on_demand: case p2m_ram_paging_out: case p2m_ram_paged: case p2m_ram_paging_in: default: return flags | _PAGE_NX_BIT; case p2m_grant_map_ro: case p2m_mmio_write_dm: return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT; case p2m_ram_ro: case p2m_ram_logdirty: case p2m_ram_shared: return flags | P2M_BASE_FLAGS; case p2m_ram_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW; case p2m_grant_map_rw: case p2m_map_foreign: return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; else ASSERT(!level); return flags | P2M_BASE_FLAGS | _PAGE_PCD; } }
int p2m_set_altp2m_mem_access(struct domain *d, struct p2m_domain *hp2m, struct p2m_domain *ap2m, p2m_access_t a, gfn_t gfn) { mfn_t mfn; p2m_type_t t; p2m_access_t old_a; unsigned int page_order; unsigned long gfn_l = gfn_x(gfn); int rc; mfn = ap2m->get_entry(ap2m, gfn_l, &t, &old_a, 0, NULL, NULL); /* Check host p2m if no valid entry in alternate */ if ( !mfn_valid(mfn) ) { mfn = __get_gfn_type_access(hp2m, gfn_l, &t, &old_a, P2M_ALLOC | P2M_UNSHARE, &page_order, 0); rc = -ESRCH; if ( !mfn_valid(mfn) || t != p2m_ram_rw ) return rc; /* If this is a superpage, copy that first */ if ( page_order != PAGE_ORDER_4K ) { unsigned long mask = ~((1UL << page_order) - 1); unsigned long gfn2_l = gfn_l & mask; mfn_t mfn2 = _mfn(mfn_x(mfn) & mask); rc = ap2m->set_entry(ap2m, gfn2_l, mfn2, page_order, t, old_a, 1); if ( rc ) return rc; } } return ap2m->set_entry(ap2m, gfn_l, mfn, PAGE_ORDER_4K, t, a, (current->domain != d)); }
static struct page_info *hap_alloc_p2m_page(struct domain *d) { struct page_info *pg; hap_lock(d); pg = hap_alloc(d); #if CONFIG_PAGING_LEVELS == 3 /* Under PAE mode, top-level P2M table should be allocated below 4GB space * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to * force this requirement, and exchange the guaranteed 32-bit-clean * page for the one we just hap_alloc()ed. */ if ( d->arch.paging.hap.p2m_pages == 0 && mfn_x(page_to_mfn(pg)) >= (1UL << (32 - PAGE_SHIFT)) ) { free_domheap_page(pg); pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32)); if ( likely(pg != NULL) ) { void *p = hap_map_domain_page(page_to_mfn(pg)); clear_page(p); hap_unmap_domain_page(p); } } #endif if ( likely(pg != NULL) ) { d->arch.paging.hap.total_pages--; d->arch.paging.hap.p2m_pages++; page_set_owner(pg, d); pg->count_info = 1; } hap_unlock(d); return pg; }
/* Returns: 0 for success, -errno for failure */ static int p2m_next_level(struct p2m_domain *p2m, void **table, unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type, bool_t unmap) { l1_pgentry_t *l1_entry; l1_pgentry_t *p2m_entry; l1_pgentry_t new_entry; void *next; int i; if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, shift, max)) ) return -ENOENT; /* PoD/paging: Not present doesn't imply empty. */ if ( !l1e_get_flags(*p2m_entry) ) { struct page_info *pg; pg = p2m_alloc_ptp(p2m, type); if ( pg == NULL ) return -ENOMEM; new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); switch ( type ) { case PGT_l3_page_table: p2m_add_iommu_flags(&new_entry, 3, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 4); break; case PGT_l2_page_table: p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 3); break; case PGT_l1_page_table: p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 2); break; default: BUG(); break; } } ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE)); /* split 1GB pages into 2MB pages */ if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; struct page_info *pg; pg = p2m_alloc_ptp(p2m, PGT_l2_page_table); if ( pg == NULL ) return -ENOMEM; flags = l1e_get_flags(*p2m_entry); pfn = l1e_get_pfn(*p2m_entry); l1_entry = __map_domain_page(pg); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags); p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 2); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); /* disable PSE */ p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 3); } /* split single 2MB large page into 4KB page in P2M table */ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; struct page_info *pg; pg = p2m_alloc_ptp(p2m, PGT_l1_page_table); if ( pg == NULL ) return -ENOMEM; /* New splintered mappings inherit the flags of the old superpage, * with a little reorganisation for the _PAGE_PSE_PAT bit. */ flags = l1e_get_flags(*p2m_entry); pfn = l1e_get_pfn(*p2m_entry); if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */ else flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */ l1_entry = __map_domain_page(pg); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + i, flags); p2m_add_iommu_flags(&new_entry, 0, 0); p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 1); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 2); } next = map_domain_page(_mfn(l1e_get_pfn(*p2m_entry))); if ( unmap ) unmap_domain_page(*table); *table = next; return 0; }
static void update_reference_tsc(struct domain *d, bool_t initialize) { unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); HV_REFERENCE_TSC_PAGE *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); if ( initialize ) clear_page(p); /* * This enlightenment must be disabled is the host TSC is not invariant. * However it is also disabled if vtsc is true (which means rdtsc is being * emulated). This generally happens when guest TSC freq and host TSC freq * don't match. The TscScale value could be adjusted to cope with this, * allowing vtsc to be turned off, but support for this is not yet present * in the hypervisor. Thus is it is possible that migrating a Windows VM * between hosts of differing TSC frequencies may result in large * differences in guest performance. */ if ( !host_tsc_is_safe() || d->arch.vtsc ) { /* * The specification states that valid values of TscSequence range * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate * this mechanism is no longer a reliable source of time and that * the VM should fall back to a different source. * * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate * the spec. and rely on a value of 0 to indicate that this * enlightenment should no longer be used. These two kernel * versions are currently the only ones to make use of this * enlightenment, so just use 0 here. */ p->TscSequence = 0; printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n", d->domain_id); goto out; } /* * The guest will calculate reference time according to the following * formula: * * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset * * Windows uses a 100ns tick, so we need a scale which is cpu * ticks per 100ns shifted left by 64. */ p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32; p->TscSequence++; if ( p->TscSequence == 0xFFFFFFFF || p->TscSequence == 0 ) /* Avoid both 'invalid' values */ p->TscSequence = 1; out: unmap_domain_page(p); put_page_and_type(page); }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); put_gfn(d, gmfn); /* If the page hasn't yet been paged out, there is an * actual page that needs to be released. */ if ( p2mt == p2m_ram_paging_out ) { ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } p2m_mem_paging_drop_page(d, gmfn, p2mt); return 1; } if ( p2mt == p2m_mmio_direct ) { clear_mmio_p2m_entry(d, gmfn, _mfn(mfn)); put_gfn(d, gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } #ifdef CONFIG_X86 if ( p2m_is_shared(p2mt) ) { /* Unshare the page, bail out on error. We unshare because * we might be the only one using this shared page, and we * need to trigger proper cleanup. Once done, this is * like any other page. */ if ( mem_sharing_unshare_page(d, gmfn, 0) ) { put_gfn(d, gmfn); (void)mem_sharing_notify_enomem(d, gmfn, 0); return 0; } /* Maybe the mfn changed */ mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt)); ASSERT(!p2m_is_shared(p2mt)); } #endif /* CONFIG_X86 */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); /* * With the lack of an IOMMU on some platforms, domains with DMA-capable * device must retrieve the same pfn when the hypercall populate_physmap * is called. * * For this purpose (and to match populate_physmap() behavior), the page * is kept allocated. */ if ( !is_domain_direct_mapped(d) && test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); put_gfn(d, gmfn); return 1; }
long p2m_pt_audit_p2m(struct p2m_domain *p2m) { unsigned long entry_count = 0, pmbad = 0; unsigned long mfn, gfn, m2pfn; int test_linear; struct domain *d = p2m->domain; ASSERT(p2m_locked_by_me(p2m)); ASSERT(pod_locked_by_me(p2m)); test_linear = ( (d == current->domain) && !pagetable_is_null(current->arch.monitor_table) ); if ( test_linear ) flush_tlb_local(); /* Audit part one: walk the domain's p2m table, checking the entries. */ if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { l2_pgentry_t *l2e; l1_pgentry_t *l1e; int i1, i2; #if CONFIG_PAGING_LEVELS == 4 l4_pgentry_t *l4e; l3_pgentry_t *l3e; int i4, i3; l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #else /* CONFIG_PAGING_LEVELS == 3 */ l3_pgentry_t *l3e; int i3; l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #endif gfn = 0; #if CONFIG_PAGING_LEVELS >= 4 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) { if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) { gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); #endif for ( i3 = 0; i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); i3++ ) { if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) { gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for 1GB super page */ if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE ) { mfn = l3e_get_pfn(l3e[i3]); ASSERT(mfn_valid(_mfn(mfn))); /* we have to cover 512x512 4K pages */ for ( i2 = 0; i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES); i2++) { m2pfn = get_gpfn_from_mfn(mfn+i2); if ( m2pfn != (gfn + i2) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i2, mfn+i2, m2pfn); BUG(); } gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } } l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) { if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) && ( p2m_flags_to_type(l2e_get_flags(l2e[i2])) == p2m_populate_on_demand ) ) entry_count+=SUPERPAGE_PAGES; gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for super page */ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE ) { mfn = l2e_get_pfn(l2e[i2]); ASSERT(mfn_valid(_mfn(mfn))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++) { m2pfn = get_gpfn_from_mfn(mfn+i1); /* Allow shared M2Ps */ if ( (m2pfn != (gfn + i1)) && (m2pfn != SHARED_M2P_ENTRY) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i1, mfn+i1, m2pfn); BUG(); } } gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) { p2m_type_t type; type = p2m_flags_to_type(l1e_get_flags(l1e[i1])); if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) { if ( type == p2m_populate_on_demand ) entry_count++; continue; } mfn = l1e_get_pfn(l1e[i1]); ASSERT(mfn_valid(_mfn(mfn))); m2pfn = get_gpfn_from_mfn(mfn); if ( m2pfn != gfn && type != p2m_mmio_direct && !p2m_is_grant(type) && !p2m_is_shared(type) ) { pmbad++; printk("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); BUG(); } } unmap_domain_page(l1e); } unmap_domain_page(l2e); } #if CONFIG_PAGING_LEVELS >= 4 unmap_domain_page(l3e); } #endif #if CONFIG_PAGING_LEVELS == 4 unmap_domain_page(l4e); #else /* CONFIG_PAGING_LEVELS == 3 */ unmap_domain_page(l3e); #endif } if ( entry_count != p2m->pod.entry_count ) { printk("%s: refcounted entry count %ld, audit count %lu!\n", __func__, p2m->pod.entry_count, entry_count); BUG(); } return pmbad; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return INVALID_GFN; } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return INVALID_GFN; } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; return INVALID_GFN; }
/* Map a page of domheap memory */ void *map_domain_page(mfn_t mfn) { unsigned long flags; lpae_t *map = this_cpu(xen_dommap); unsigned long slot_mfn = mfn_x(mfn) & ~LPAE_ENTRY_MASK; vaddr_t va; lpae_t pte; int i, slot; local_irq_save(flags); /* The map is laid out as an open-addressed hash table where each * entry is a 2MB superpage pte. We use the available bits of each * PTE as a reference count; when the refcount is zero the slot can * be reused. */ for ( slot = (slot_mfn >> LPAE_SHIFT) % DOMHEAP_ENTRIES, i = 0; i < DOMHEAP_ENTRIES; slot = (slot + 1) % DOMHEAP_ENTRIES, i++ ) { if ( map[slot].pt.avail < 0xf && map[slot].pt.base == slot_mfn && map[slot].pt.valid ) { /* This slot already points to the right place; reuse it */ map[slot].pt.avail++; break; } else if ( map[slot].pt.avail == 0 ) { /* Commandeer this 2MB slot */ pte = mfn_to_xen_entry(slot_mfn, WRITEALLOC); pte.pt.avail = 1; write_pte(map + slot, pte); break; } } /* If the map fills up, the callers have misbehaved. */ BUG_ON(i == DOMHEAP_ENTRIES); #ifndef NDEBUG /* Searching the hash could get slow if the map starts filling up. * Cross that bridge when we come to it */ { static int max_tries = 32; if ( i >= max_tries ) { dprintk(XENLOG_WARNING, "Domheap map is filling: %i tries\n", i); max_tries *= 2; } } #endif local_irq_restore(flags); va = (DOMHEAP_VIRT_START + (slot << SECOND_SHIFT) + ((mfn_x(mfn) & LPAE_ENTRY_MASK) << THIRD_SHIFT)); /* * We may not have flushed this specific subpage at map time, * since we only flush the 4k page not the superpage */ flush_xen_data_tlb_range_va_local(va, PAGE_SIZE); return (void *)va; }