void share_xen_page_with_guest( struct page_info *page, struct domain *d, int readonly) { if ( page_get_owner(page) == d ) return; /* this causes us to leak pages in the Domain and reuslts in * Zombie domains, I think we are missing a piece, until we find * it we disable the following code */ set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); spin_lock(&d->page_alloc_lock); /* The incremented type count pins as writable or read-only. */ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); page->u.inuse.type_info |= PGT_validated | 1; page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ ASSERT(page->count_info == 0); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) { page->count_info |= PGC_allocated | 1; if ( unlikely(d->xenheap_pages++ == 0) ) get_knownalive_domain(d); list_add_tail(&page->list, &d->xenpage_list); } spin_unlock(&d->page_alloc_lock); }
static void initialize_apic_assist(struct vcpu *v) { struct domain *d = v->domain; unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); uint8_t *p; /* * We don't yet make use of the APIC assist page but by setting * the CPUID3A_MSR_APIC_ACCESS bit in CPUID leaf 40000003 we are duty * bound to support the MSR. We therefore do just enough to keep windows * happy. * * See http://msdn.microsoft.com/en-us/library/ff538657%28VS.85%29.aspx for * details of how Windows uses the page. */ if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, page_to_mfn(page)); return; } p = __map_domain_page(page); *(u32 *)p = 0; unmap_domain_page(p); put_page_and_type(page); }
static void enable_hypercall_page(struct domain *d) { unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); uint8_t *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); /* * We set the bit 31 in %eax (reserved field in the Viridian hypercall * calling convention) to differentiate Xen and Viridian hypercalls. */ *(u8 *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */ *(u32 *)(p + 1) = 0x80000000; *(u8 *)(p + 5) = 0x0f; /* vmcall/vmmcall */ *(u8 *)(p + 6) = 0x01; *(u8 *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9); *(u8 *)(p + 8) = 0xc3; /* ret */ memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */ unmap_domain_page(p); put_page_and_type(page); }
/* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM); write_pte(entry, pte); return 0; }
/* * Allocate a new page table page and hook it in via the given entry. * apply_one_level relies on this returning 0 on success * and -ve on failure. * * If the existing entry is present then it must be a mapping and not * a table and it will be shattered into the next level down. * * level_shift is the number of bits at the level we want to create. */ static int p2m_create_table(struct domain *d, lpae_t *entry, int level_shift, bool_t flush_cache) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; lpae_t *p; lpae_t pte; int splitting = p2m_valid(*entry); BUG_ON(p2m_table(*entry)); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); if ( splitting ) { p2m_type_t t = entry->p2m.type; unsigned long base_pfn = entry->p2m.base; int i; /* * We are either splitting a first level 1G page into 512 second level * 2M pages, or a second level 2M page into 512 third level 4K pages. */ for ( i=0 ; i < LPAE_ENTRIES; i++ ) { pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)), MATTR_MEM, t, p2m->default_access); /* * First and second level super pages set p2m.table = 0, but * third level entries set table = 1. */ if ( level_shift - LPAE_SHIFT ) pte.p2m.table = 0; write_pte(&p[i], pte); } } else clear_page(p); if ( flush_cache ) clean_dcache_va_range(p, PAGE_SIZE); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid, p2m->default_access); p2m_write_pte(entry, pte, flush_cache); return 0; }
// Allocate a new p2m table for a domain. // // The structure of the p2m table is that of a pagetable for xen (i.e. it is // controlled by CONFIG_PAGING_LEVELS). // // Returns 0 for success or -errno. // int p2m_alloc_table(struct p2m_domain *p2m) { struct page_info *p2m_top; struct domain *d = p2m->domain; p2m_lock(p2m); if ( !p2m_is_nestedp2m(p2m) && !page_list_empty(&d->page_list) ) { P2M_ERROR("dom %d already has memory allocated\n", d->domain_id); p2m_unlock(p2m); return -EINVAL; } if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { P2M_ERROR("p2m already allocated for this domain\n"); p2m_unlock(p2m); return -EINVAL; } P2M_PRINTK("allocating p2m table\n"); p2m_top = p2m_alloc_ptp(p2m, PGT_l4_page_table); if ( p2m_top == NULL ) { p2m_unlock(p2m); return -ENOMEM; } p2m->phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); if ( hap_enabled(d) ) iommu_share_p2m_table(d); P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ p2m->defer_nested_flush = 1; if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, p2m->default_access) ) goto error; p2m->defer_nested_flush = 0; P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); p2m_unlock(p2m); return 0; spin_unlock(&p2m->domain->page_alloc_lock); error: P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" PRI_mfn "\n", gfn, mfn_x(mfn)); p2m_unlock(p2m); return -ENOMEM; }
void dump_p2m_lookup(struct domain *d, paddr_t addr) { struct p2m_domain *p2m = &d->arch.p2m; printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr); printk("P2M @ %p mfn:0x%lx\n", p2m->root, page_to_mfn(p2m->root)); dump_pt_walk(page_to_maddr(p2m->root), addr, P2M_ROOT_LEVEL, P2M_ROOT_PAGES); }
static void populate_physmap(struct memop_args *a) { struct page_info *page; unsigned long i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( (a->extent_order != 0) && !multipage_allocation_permitted(current->domain) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(page == NULL) ) { gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " "id=%d memflags=%x (%ld of %d)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } mfn = page_to_mfn(page); guest_physmap_add_page(d, gpfn, mfn, a->extent_order); if ( !paging_mode_translate(d) ) { for ( j = 0; j < (1 << a->extent_order); j++ ) set_gpfn_from_mfn(mfn + j, gpfn + j); /* Inform the domain of the new page's machine address. */ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } out: a->nr_done = i; }
static void initialize_apic_assist(struct vcpu *v) { struct domain *d = v->domain; unsigned long gmfn = v->arch.hvm_vcpu.viridian.apic_assist.msr.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); void *va; /* * See section 13.3.4.1 of the specification for details of this * enlightenment. */ if ( !page ) goto fail; if ( !get_page_type(page, PGT_writable_page) ) { put_page(page); goto fail; } va = __map_domain_page_global(page); if ( !va ) { put_page_and_type(page); goto fail; } *(uint32_t *)va = 0; if ( viridian_feature_mask(v->domain) & HVMPV_apic_assist ) { /* * If we overwrite an existing address here then something has * gone wrong and a domain page will leak. Instead crash the * domain to make the problem obvious. */ if ( v->arch.hvm_vcpu.viridian.apic_assist.va ) domain_crash(d); v->arch.hvm_vcpu.viridian.apic_assist.va = va; return; } unmap_domain_page_global(va); put_page_and_type(page); return; fail: gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); }
/* Populate a HVM memory range using the biggest possible order. */ static int __init pvh_populate_memory_range(struct domain *d, unsigned long start, unsigned long nr_pages) { unsigned int order, i = 0; struct page_info *page; int rc; #define MAP_MAX_ITER 64 order = MAX_ORDER; while ( nr_pages != 0 ) { unsigned int range_order = get_order_from_pages(nr_pages + 1); order = min(range_order ? range_order - 1 : 0, order); page = alloc_domheap_pages(d, order, dom0_memflags); if ( page == NULL ) { if ( order == 0 && dom0_memflags ) { /* Try again without any dom0_memflags. */ dom0_memflags = 0; order = MAX_ORDER; continue; } if ( order == 0 ) { printk("Unable to allocate memory with order 0!\n"); return -ENOMEM; } order--; continue; } rc = guest_physmap_add_page(d, _gfn(start), _mfn(page_to_mfn(page)), order); if ( rc != 0 ) { printk("Failed to populate memory: [%#lx,%lx): %d\n", start, start + (1UL << order), rc); return -ENOMEM; } start += 1UL << order; nr_pages -= 1UL << order; if ( (++i % MAP_MAX_ITER) == 0 ) process_pending_softirqs(); } return 0; #undef MAP_MAX_ITER }
void dump_p2m_lookup(struct domain *d, paddr_t addr) { struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first; printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr); printk("P2M @ %p mfn:0x%lx\n", p2m->first_level, page_to_mfn(p2m->first_level)); first = __map_domain_page(p2m->first_level); dump_pt_walk(first, addr); unmap_domain_page(first); }
static struct page_info *hap_alloc_p2m_page(struct domain *d) { struct page_info *pg; hap_lock(d); pg = hap_alloc(d); #if CONFIG_PAGING_LEVELS == 3 /* Under PAE mode, top-level P2M table should be allocated below 4GB space * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to * force this requirement, and exchange the guaranteed 32-bit-clean * page for the one we just hap_alloc()ed. */ if ( d->arch.paging.hap.p2m_pages == 0 && mfn_x(page_to_mfn(pg)) >= (1UL << (32 - PAGE_SHIFT)) ) { free_domheap_page(pg); pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32)); if ( likely(pg != NULL) ) { void *p = hap_map_domain_page(page_to_mfn(pg)); clear_page(p); hap_unmap_domain_page(p); } } #endif if ( likely(pg != NULL) ) { d->arch.paging.hap.total_pages--; d->arch.paging.hap.p2m_pages++; page_set_owner(pg, d); pg->count_info = 1; } hap_unlock(d); return pg; }
static void increase_reservation(struct memop_args *a) { struct page_info *page; unsigned long i; xen_pfn_t mfn; struct domain *d = a->domain; if ( !guest_handle_is_null(a->extent_list) && !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( !multipage_allocation_permitted(current->domain, a->extent_order) ) return; mcd_mem_inc_trap(a->domain, (a->nr_extents - a->nr_done)); for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(page == NULL) ) { gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " "id=%d memflags=%x (%ld of %d)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } /* Inform the domain of the new page's machine address. */ if ( !guest_handle_is_null(a->extent_list) ) { mfn = page_to_mfn(page); if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } out: a->nr_done = i; mcd_mem_upt_trap(d); }
int amd_iommu_sync_p2m(struct domain *d) { unsigned long mfn, gfn, flags; u64 iommu_l2e; struct list_head *entry; struct page_info *page; struct hvm_iommu *hd; int iw = IOMMU_IO_WRITE_ENABLED; int ir = IOMMU_IO_READ_ENABLED; if ( !is_hvm_domain(d) ) return 0; hd = domain_hvm_iommu(d); spin_lock_irqsave(&hd->mapping_lock, flags); if ( hd->p2m_synchronized ) goto out; for ( entry = d->page_list.next; entry != &d->page_list; entry = entry->next ) { page = list_entry(entry, struct page_info, list); mfn = page_to_mfn(page); gfn = get_gpfn_from_mfn(mfn); if ( gfn == INVALID_M2P_ENTRY ) continue; iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); } hd->p2m_synchronized = 1; out: spin_unlock_irqrestore(&hd->mapping_lock, flags); return 0; }
static u64 iommu_l2e_from_pfn(struct page_info *table, int level, unsigned long io_pfn) { unsigned long offset; void *pde = NULL; void *table_vaddr; u64 next_table_maddr = 0; BUG_ON( table == NULL || level == 0 ); while ( level > 1 ) { offset = io_pfn >> ((PTE_PER_TABLE_SHIFT * (level - IOMMU_PAGING_MODE_LEVEL_1))); offset &= ~PTE_PER_TABLE_MASK; table_vaddr = map_domain_page(page_to_mfn(table)); pde = table_vaddr + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE); next_table_maddr = amd_iommu_get_next_table_from_pte(pde); if ( !amd_iommu_is_pte_present(pde) ) { if ( next_table_maddr == 0 ) { table = alloc_amd_iommu_pgtable(); if ( table == NULL ) return 0; next_table_maddr = page_to_maddr(table); amd_iommu_set_page_directory_entry( (u32 *)pde, next_table_maddr, level - 1); } else /* should never reach here */ return 0; } unmap_domain_page(table_vaddr); table = maddr_to_page(next_table_maddr); level--; } return next_table_maddr; }
void dump_p2m_lookup(struct domain *d, paddr_t addr) { struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first; printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr); if ( first_linear_offset(addr) > LPAE_ENTRIES ) { printk("Cannot dump addresses in second of first level pages...\n"); return; } printk("P2M @ %p mfn:0x%lx\n", p2m->first_level, page_to_mfn(p2m->first_level)); first = __map_domain_page(p2m->first_level); dump_pt_walk(first, addr); unmap_domain_page(first); }
static struct page_info *hap_alloc(struct domain *d) { struct page_info *pg = NULL; void *p; ASSERT(hap_locked_by_me(d)); if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) ) return NULL; pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list); list_del(&pg->list); d->arch.paging.hap.free_pages--; p = hap_map_domain_page(page_to_mfn(pg)); ASSERT(p != NULL); clear_page(p); hap_unmap_domain_page(p); return pg; }
void __init vm_init(void) { unsigned int i, nr; unsigned long va; vm_base = (void *)VMAP_VIRT_START; vm_end = PFN_DOWN(arch_vmap_virt_end() - vm_base); vm_low = PFN_UP((vm_end + 7) / 8); nr = PFN_UP((vm_low + 7) / 8); vm_top = nr * PAGE_SIZE * 8; for ( i = 0, va = (unsigned long)vm_bitmap; i < nr; ++i, va += PAGE_SIZE ) { struct page_info *pg = alloc_domheap_page(NULL, 0); map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); clear_page((void *)va); } bitmap_fill(vm_bitmap, vm_low); /* Populate page tables for the bitmap if necessary. */ map_pages_to_xen(va, 0, vm_low - nr, MAP_SMALL_PAGES); }
int arch_iommu_populate_page_table(struct domain *d) { const struct domain_iommu *hd = dom_iommu(d); struct page_info *page; int rc = 0, n = 0; d->need_iommu = -1; this_cpu(iommu_dont_flush_iotlb) = 1; spin_lock(&d->page_alloc_lock); if ( unlikely(d->is_dying) ) rc = -ESRCH; while ( !rc && (page = page_list_remove_head(&d->page_list)) ) { if ( has_hvm_container_domain(d) || (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) { unsigned long mfn = page_to_mfn(page); unsigned long gfn = mfn_to_gmfn(d, mfn); if ( gfn != gfn_x(INVALID_GFN) ) { ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH)); BUG_ON(SHARED_M2P(gfn)); rc = hd->platform_ops->map_page(d, gfn, mfn, IOMMUF_readable | IOMMUF_writable); } if ( rc ) { page_list_add(page, &d->page_list); break; } }
static int setup_compat_l4(struct vcpu *v) { struct page_info *pg; l4_pgentry_t *l4tab; mfn_t mfn; pg = alloc_domheap_page(v->domain, MEMF_no_owner); if ( pg == NULL ) return -ENOMEM; mfn = page_to_mfn(pg); l4tab = map_domain_page(mfn); clear_page(l4tab); init_xen_l4_slots(l4tab, mfn, v->domain, INVALID_MFN, false); unmap_domain_page(l4tab); /* This page needs to look like a pagetable so that it can be shadowed */ pg->u.inuse.type_info = PGT_l4_page_table | PGT_validated | 1; v->arch.guest_table = pagetable_from_page(pg); v->arch.guest_table_user = v->arch.guest_table; return 0; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return INVALID_GFN; } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return INVALID_GFN; } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; return INVALID_GFN; }
static void populate_physmap(struct memop_args *a) { struct page_info *page; unsigned int i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : max_order(current->domain)) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( i != a->nr_done && hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; if ( a->memflags & MEMF_populate_on_demand ) { if ( guest_physmap_mark_populate_on_demand(d, gpfn, a->extent_order) < 0 ) goto out; } else { if ( is_domain_direct_mapped(d) ) { mfn = gpfn; for ( j = 0; j < (1U << a->extent_order); j++, mfn++ ) { if ( !mfn_valid(mfn) ) { gdprintk(XENLOG_INFO, "Invalid mfn %#"PRI_xen_pfn"\n", mfn); goto out; } page = mfn_to_page(mfn); if ( !get_page(page, d) ) { gdprintk(XENLOG_INFO, "mfn %#"PRI_xen_pfn" doesn't belong to d%d\n", mfn, d->domain_id); goto out; } put_page(page); } mfn = gpfn; page = mfn_to_page(mfn); } else { page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(!page) ) { if ( !opt_tmem || a->extent_order ) gdprintk(XENLOG_INFO, "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } mfn = page_to_mfn(page); } guest_physmap_add_page(d, gpfn, mfn, a->extent_order); if ( !paging_mode_translate(d) ) { for ( j = 0; j < (1U << a->extent_order); j++ ) set_gpfn_from_mfn(mfn + j, gpfn + j); /* Inform the domain of the new page's machine address. */ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } } out: a->nr_done = i; }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* xn and write bit will be defined in the switch */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.sh = LPAE_SH_OUTER, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (t) { case p2m_ram_rw: e.p2m.xn = 0; e.p2m.write = 1; break; case p2m_ram_ro: e.p2m.xn = 0; e.p2m.write = 0; break; case p2m_map_foreign: case p2m_grant_map_rw: case p2m_mmio_direct: e.p2m.xn = 1; e.p2m.write = 1; break; case p2m_grant_map_ro: case p2m_invalid: e.p2m.xn = 1; e.p2m.write = 0; break; case p2m_max_real_type: BUG(); break; } ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } /* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); write_pte(entry, pte); return 0; } enum p2m_operation { INSERT, ALLOCATE, REMOVE, RELINQUISH, CACHEFLUSH, }; static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr, p2m_type_t t) { int rc; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_page = ~0, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; unsigned int flush = 0; bool_t populate = (op == INSERT || op == ALLOCATE); lpae_t pte; spin_lock(&p2m->lock); if ( d != current->domain ) p2m_load_VTTBR(d); addr = start_gpaddr; while ( addr < end_gpaddr ) { if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); first = p2m_map_first(p2m, addr); if ( !first ) { rc = -EINVAL; goto out; } cur_first_page = p2m_first_level_index(addr); } if ( !first[first_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + FIRST_SIZE) & FIRST_MASK; continue; } rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + SECOND_SIZE) & SECOND_MASK; continue; } rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } pte = third[third_table_offset(addr)]; flush |= pte.p2m.valid; /* TODO: Handle other p2m type * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; ASSERT(!pte.p2m.valid); rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case RELINQUISH: case REMOVE: { if ( !pte.p2m.valid ) { count++; break; } count += 0x10; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); count++; } break; case CACHEFLUSH: { if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) break; flush_page_to_ram(pte.p2m.base); } break; } /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ if ( op == RELINQUISH && count >= 0x2000 ) { if ( hypercall_preempt_check() ) { p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; rc = -EAGAIN; goto out; } count = 0; } /* Got the next page */ addr += PAGE_SIZE; } if ( flush ) { /* At the beginning of the function, Xen is updating VTTBR * with the domain where the mappings are created. In this * case it's only necessary to flush TLBs on every CPUs with * the current VMID (our domain). */ flush_tlb(); } if ( op == ALLOCATE || op == INSERT ) { unsigned long sgfn = paddr_to_pfn(start_gpaddr); unsigned long egfn = paddr_to_pfn(end_gpaddr); p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn); p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); if ( d != current->domain ) p2m_load_VTTBR(current->domain); spin_unlock(&p2m->lock); return rc; }
static int create_p2m_entries(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr) { int rc, flush; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_offset = ~0, cur_second_offset = ~0; spin_lock(&p2m->lock); /* XXX Don't actually handle 40 bit guest physical addresses */ BUG_ON(start_gpaddr & 0x8000000000ULL); BUG_ON(end_gpaddr & 0x8000000000ULL); first = __map_domain_page(p2m->first_level); for(addr = start_gpaddr; addr < end_gpaddr; addr += PAGE_SIZE) { if ( !first[first_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } flush = third[third_table_offset(addr)].p2m.valid; /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; lpae_t pte; rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { lpae_t pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case REMOVE: { lpae_t pte; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; } if ( flush ) flush_tlb_all_local(); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); spin_unlock(&p2m->lock); return rc; }
int get_page_type(struct page_info *page, unsigned long type) { unsigned long nx, x, y = page->u.inuse.type_info; ASSERT(!(type & ~PGT_type_mask)); again: do { x = y; nx = x + 1; if ( unlikely((nx & PGT_count_mask) == 0) ) { MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); return 0; } else if ( unlikely((x & PGT_count_mask) == 0) ) { if ( (x & PGT_type_mask) != type ) { /* * On type change we check to flush stale TLB entries. This * may be unnecessary (e.g., page was GDT/LDT) but those * circumstances should be very rare. */ cpumask_t mask = page_get_owner(page)->domain_dirty_cpumask; tlbflush_filter(mask, page->tlbflush_timestamp); if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); flush_tlb_mask(mask); } /* We lose existing type, back pointer, and validity. */ nx &= ~(PGT_type_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ /* Page tables and GDT/LDT need to be scanned for validity. */ if ( type == PGT_writable_page ) nx |= PGT_validated; } } else if ( unlikely((x & PGT_type_mask) != type) ) { return 0; } else if ( unlikely(!(x & PGT_validated)) ) { /* Someone else is updating validation of this page. Wait... */ while ( (y = page->u.inuse.type_info) == x ) cpu_relax(); goto again; } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); if ( unlikely(!(nx & PGT_validated)) ) { /* Noone else is updating simultaneously. */ __set_bit(_PGT_validated, &page->u.inuse.type_info); } return 1; }
void *vm_alloc(unsigned int nr, unsigned int align) { unsigned int start, bit; if ( !align ) align = 1; else if ( align & (align - 1) ) align &= -align; spin_lock(&vm_lock); for ( ; ; ) { struct page_info *pg; ASSERT(vm_low == vm_top || !test_bit(vm_low, vm_bitmap)); for ( start = vm_low; start < vm_top; ) { bit = find_next_bit(vm_bitmap, vm_top, start + 1); if ( bit > vm_top ) bit = vm_top; /* * Note that this skips the first bit, making the * corresponding page a guard one. */ start = (start + align) & ~(align - 1); if ( bit < vm_top ) { if ( start + nr < bit ) break; start = find_next_zero_bit(vm_bitmap, vm_top, bit + 1); } else { if ( start + nr <= bit ) break; start = bit; } } if ( start < vm_top ) break; spin_unlock(&vm_lock); if ( vm_top >= vm_end ) return NULL; pg = alloc_domheap_page(NULL, 0); if ( !pg ) return NULL; spin_lock(&vm_lock); if ( start >= vm_top ) { unsigned long va = (unsigned long)vm_bitmap + vm_top / 8; if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) ) { clear_page((void *)va); vm_top += PAGE_SIZE * 8; if ( vm_top > vm_end ) vm_top = vm_end; continue; } } free_domheap_page(pg); if ( start >= vm_top ) { spin_unlock(&vm_lock); return NULL; } } for ( bit = start; bit < start + nr; ++bit ) __set_bit(bit, vm_bitmap); if ( bit < vm_top ) ASSERT(!test_bit(bit, vm_bitmap)); else ASSERT(bit == vm_top); if ( start <= vm_low + 2 ) vm_low = bit; spin_unlock(&vm_lock); return vm_base + start * PAGE_SIZE; }
/* Returns: 0 for success, -errno for failure */ static int p2m_next_level(struct p2m_domain *p2m, void **table, unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type, bool_t unmap) { l1_pgentry_t *l1_entry; l1_pgentry_t *p2m_entry; l1_pgentry_t new_entry; void *next; int i; if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, shift, max)) ) return -ENOENT; /* PoD/paging: Not present doesn't imply empty. */ if ( !l1e_get_flags(*p2m_entry) ) { struct page_info *pg; pg = p2m_alloc_ptp(p2m, type); if ( pg == NULL ) return -ENOMEM; new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); switch ( type ) { case PGT_l3_page_table: p2m_add_iommu_flags(&new_entry, 3, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 4); break; case PGT_l2_page_table: p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 3); break; case PGT_l1_page_table: p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 2); break; default: BUG(); break; } } ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE)); /* split 1GB pages into 2MB pages */ if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; struct page_info *pg; pg = p2m_alloc_ptp(p2m, PGT_l2_page_table); if ( pg == NULL ) return -ENOMEM; flags = l1e_get_flags(*p2m_entry); pfn = l1e_get_pfn(*p2m_entry); l1_entry = __map_domain_page(pg); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags); p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 2); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); /* disable PSE */ p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 3); } /* split single 2MB large page into 4KB page in P2M table */ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; struct page_info *pg; pg = p2m_alloc_ptp(p2m, PGT_l1_page_table); if ( pg == NULL ) return -ENOMEM; /* New splintered mappings inherit the flags of the old superpage, * with a little reorganisation for the _PAGE_PSE_PAT bit. */ flags = l1e_get_flags(*p2m_entry); pfn = l1e_get_pfn(*p2m_entry); if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */ else flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */ l1_entry = __map_domain_page(pg); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + i, flags); p2m_add_iommu_flags(&new_entry, 0, 0); p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 1); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), P2M_BASE_FLAGS | _PAGE_RW); p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 2); } next = map_domain_page(_mfn(l1e_get_pfn(*p2m_entry))); if ( unmap ) unmap_domain_page(*table); *table = next; return 0; }
static void update_reference_tsc(struct domain *d, bool_t initialize) { unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn; struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); HV_REFERENCE_TSC_PAGE *p; if ( !page || !get_page_type(page, PGT_writable_page) ) { if ( page ) put_page(page); gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN)); return; } p = __map_domain_page(page); if ( initialize ) clear_page(p); /* * This enlightenment must be disabled is the host TSC is not invariant. * However it is also disabled if vtsc is true (which means rdtsc is being * emulated). This generally happens when guest TSC freq and host TSC freq * don't match. The TscScale value could be adjusted to cope with this, * allowing vtsc to be turned off, but support for this is not yet present * in the hypervisor. Thus is it is possible that migrating a Windows VM * between hosts of differing TSC frequencies may result in large * differences in guest performance. */ if ( !host_tsc_is_safe() || d->arch.vtsc ) { /* * The specification states that valid values of TscSequence range * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate * this mechanism is no longer a reliable source of time and that * the VM should fall back to a different source. * * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate * the spec. and rely on a value of 0 to indicate that this * enlightenment should no longer be used. These two kernel * versions are currently the only ones to make use of this * enlightenment, so just use 0 here. */ p->TscSequence = 0; printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n", d->domain_id); goto out; } /* * The guest will calculate reference time according to the following * formula: * * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset * * Windows uses a 100ns tick, so we need a scale which is cpu * ticks per 100ns shifted left by 64. */ p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32; p->TscSequence++; if ( p->TscSequence == 0xFFFFFFFF || p->TscSequence == 0 ) /* Avoid both 'invalid' values */ p->TscSequence = 1; out: unmap_domain_page(p); put_page_and_type(page); }
/* * 0 == (P2M_ONE_DESCEND) continue to descend the tree * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush, * entry, addr and maddr updated. Return value is an * indication of the amount of work done (for preemption). * -ve == (-Exxx) error. */ static int apply_one_level(struct domain *d, lpae_t *entry, unsigned int level, bool_t flush_cache, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t *addr, paddr_t *maddr, bool_t *flush, int mattr, p2m_type_t t, p2m_access_t a) { const paddr_t level_size = level_sizes[level]; const paddr_t level_mask = level_masks[level]; const paddr_t level_shift = level_shifts[level]; struct p2m_domain *p2m = &d->arch.p2m; lpae_t pte; const lpae_t orig_pte = *entry; int rc; BUG_ON(level > 3); switch ( op ) { case ALLOCATE: ASSERT(level < 3 || !p2m_valid(orig_pte)); ASSERT(*maddr == 0); if ( p2m_valid(orig_pte) ) return P2M_ONE_DESCEND; if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) && /* We only create superpages when mem_access is not in use. */ (level == 3 || (level < 3 && !p2m->mem_access_enabled)) ) { struct page_info *page; page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0); if ( page ) { rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a); if ( rc < 0 ) { free_domheap_page(page); return rc; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t, a); if ( level < 3 ) pte.p2m.table = 0; p2m_write_pte(entry, pte, flush_cache); p2m->stats.mappings[level]++; *addr += level_size; return P2M_ONE_PROGRESS; } else if ( level == 3 ) return -ENOMEM; } /* L3 is always suitably aligned for mapping (handled, above) */ BUG_ON(level == 3); /* * If we get here then we failed to allocate a sufficiently * large contiguous region for this level (which can't be * L3) or mem_access is in use. Create a page table and * continue to descend so we try smaller allocations. */ rc = p2m_create_table(d, entry, 0, flush_cache); if ( rc < 0 ) return rc; return P2M_ONE_DESCEND; case INSERT: if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) && /* * We do not handle replacing an existing table with a superpage * or when mem_access is in use. */ (level == 3 || (!p2m_table(orig_pte) && !p2m->mem_access_enabled)) ) { rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a); if ( rc < 0 ) return rc; /* New mapping is superpage aligned, make it */ pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t, a); if ( level < 3 ) pte.p2m.table = 0; /* Superpage entry */ p2m_write_pte(entry, pte, flush_cache); *flush |= p2m_valid(orig_pte); *addr += level_size; *maddr += level_size; if ( p2m_valid(orig_pte) ) { /* * We can't currently get here for an existing table * mapping, since we don't handle replacing an * existing table with a superpage. If we did we would * need to handle freeing (and accounting) for the bit * of the p2m tree which we would be about to lop off. */ BUG_ON(level < 3 && p2m_table(orig_pte)); if ( level == 3 ) p2m_put_l3_page(orig_pte); } else /* New mapping */ p2m->stats.mappings[level]++; return P2M_ONE_PROGRESS; } else {
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return gfn_x(INVALID_GFN); } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; goto out_tweak_pfec; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(top_mfn)); top_map = map_domain_page(top_mfn); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_walk_to_gfn(&gw); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return gfn_x(INVALID_GFN); } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PKEY_BITS ) pfec[0] |= PFEC_prot_key; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; out_tweak_pfec: /* * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS: * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled. */ if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) ) pfec[0] &= ~PFEC_insn_fetch; return gfn_x(INVALID_GFN); }