/* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); unsigned long pfn; xen_max_p2m_pfn = max_pfn; p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_missing); p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(p2m_mid_missing); p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_identity); /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just * need to graft that into our tree structure. */ for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); unsigned mididx = p2m_mid_index(pfn); if (p2m_top[topidx] == p2m_mid_missing) { unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(mid); p2m_top[topidx] = mid; } /* * As long as the mfn_list has enough entries to completely * fill a p2m page, pointing into the array is ok. But if * not the entries beyond the last pfn will be undefined. */ if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { unsigned long p2midx; p2midx = max_pfn % P2M_PER_PAGE; for ( ; p2midx < P2M_PER_PAGE; p2midx++) mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; } p2m_top[topidx][mididx] = &mfn_list[pfn]; } m2p_override_init(); }
int arch_domain_create(struct domain *d, unsigned int domcr_flags) { int rc; d->arch.relmem = RELMEM_not_started; /* Idle domains do not need this setup */ if ( is_idle_domain(d) ) return 0; if ( (rc = p2m_init(d)) != 0 ) goto fail; rc = -ENOMEM; if ( (d->shared_info = alloc_xenheap_pages(0, 0)) == NULL ) goto fail; /* Default the virtual ID to match the physical */ d->arch.vpidr = boot_cpu_data.midr.bits; clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); if ( (rc = p2m_alloc_table(d)) != 0 ) goto fail; if ( (rc = gicv_setup(d)) != 0 ) goto fail; if ( (rc = domain_vgic_init(d)) != 0 ) goto fail; if ( (rc = domain_vtimer_init(d)) != 0 ) goto fail; if ( d->domain_id ) d->arch.evtchn_irq = GUEST_EVTCHN_PPI; else d->arch.evtchn_irq = platform_dom0_evtchn_ppi(); /* * Virtual UART is only used by linux early printk and decompress code. * Only use it for the hardware domain because the linux kernel may not * support multi-platform. */ if ( is_hardware_domain(d) && (rc = domain_vuart_init(d)) ) goto fail; if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; return 0; fail: d->is_dying = DOMDYING_dead; arch_domain_destroy(d); return rc; }
void __init xen_build_dynamic_phys_to_machine(void) { unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); unsigned long pfn; xen_max_p2m_pfn = max_pfn; p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_missing); p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(p2m_mid_missing); p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_identity); for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); unsigned mididx = p2m_mid_index(pfn); if (p2m_top[topidx] == p2m_mid_missing) { unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(mid); p2m_top[topidx] = mid; } if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { unsigned long p2midx; p2midx = max_pfn % P2M_PER_PAGE; for ( ; p2midx < P2M_PER_PAGE; p2midx++) mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; } p2m_top[topidx][mididx] = &mfn_list[pfn]; } m2p_override_init(); }
static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) { unsigned topidx, mididx, idx; unsigned long *p2m; unsigned long *mid_mfn_p; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); /* Pfff.. No boundary cross-over, lets get out. */ if (!idx && check_boundary) return false; WARN(p2m_top[topidx][mididx] == p2m_identity, "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", topidx, mididx); /* * Could be done by xen_build_dynamic_phys_to_machine.. */ if (p2m_top[topidx][mididx] != p2m_missing) return false; /* Boundary cross-over for the edges: */ p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m); p2m_top[topidx][mididx] = p2m; /* For save/restore we need to MFN of the P2M saved */ mid_mfn_p = p2m_top_mfn_p[topidx]; WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", topidx, mididx); mid_mfn_p[mididx] = virt_to_mfn(p2m); return true; }
static bool __init __early_alloc_p2m(unsigned long pfn) { unsigned topidx, mididx, idx; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); if (!idx) return false; WARN(p2m_top[topidx][mididx] == p2m_identity, "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", topidx, mididx); if (p2m_top[topidx][mididx] != p2m_missing) return false; if (idx) { unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); unsigned long *mid_mfn_p; p2m_init(p2m); p2m_top[topidx][mididx] = p2m; mid_mfn_p = p2m_top_mfn_p[topidx]; WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", topidx, mididx); mid_mfn_p[mididx] = virt_to_mfn(p2m); } return idx != 0; }
/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ static bool alloc_p2m(unsigned long pfn) { unsigned topidx; unsigned long *top_mfn_p, *mid_mfn; pte_t *ptep, *pte_pg; unsigned int level; unsigned long flags; unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); unsigned long p2m_pfn; ptep = lookup_address(addr, &level); BUG_ON(!ptep || level != PG_LEVEL_4K); pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */ ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return false; } if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); if (mid_mfn == p2m_mid_missing_mfn) { /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; unsigned long old_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) return false; p2m_mid_mfn_init(mid_mfn, p2m_missing); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); if (old_mfn != missing_mfn) { free_p2m_page(mid_mfn); mid_mfn = mfn_to_virt(old_mfn); } else { p2m_top_mfn_p[topidx] = mid_mfn; } } } else { mid_mfn = NULL; } p2m_pfn = pte_pfn(READ_ONCE(*ptep)); if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { /* p2m leaf page is missing */ unsigned long *p2m; p2m = alloc_p2m_page(); if (!p2m) return false; if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); else p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); spin_lock_irqsave(&p2m_update_lock, flags); if (pte_pfn(*ptep) == p2m_pfn) { HYPERVISOR_shared_info->arch.p2m_generation++; wmb(); /* Tools are synchronizing via p2m_generation. */ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); wmb(); /* Tools are synchronizing via p2m_generation. */ HYPERVISOR_shared_info->arch.p2m_generation++; if (mid_mfn) mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m); p2m = NULL; } spin_unlock_irqrestore(&p2m_update_lock, flags); if (p2m) free_p2m_page(p2m); } /* Expanded the p2m? */ if (pfn > xen_p2m_last_pfn) { xen_p2m_last_pfn = pfn; HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } return true; }
static void __init xen_rebuild_p2m_list(unsigned long *p2m) { unsigned int i, chunk; unsigned long pfn; unsigned long *mfns; pte_t *ptep; pmd_t *pmdp; int type; p2m_missing = alloc_p2m_page(); p2m_init(p2m_missing); p2m_identity = alloc_p2m_page(); p2m_init(p2m_identity); p2m_missing_pte = alloc_p2m_page(); paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); p2m_identity_pte = alloc_p2m_page(); paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); for (i = 0; i < PTRS_PER_PTE; i++) { set_pte(p2m_missing_pte + i, pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); set_pte(p2m_identity_pte + i, pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); } for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { /* * Try to map missing/identity PMDs or p2m-pages if possible. * We have to respect the structure of the mfn_list_list * which will be built just afterwards. * Chunk size to test is one p2m page if we are in the middle * of a mfn_list_list mid page and the complete mid page area * if we are at index 0 of the mid page. Please note that a * mid page might cover more than one PMD, e.g. on 32 bit PAE * kernels. */ chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; type = xen_p2m_elem_type(pfn); i = 0; if (type != P2M_TYPE_PFN) for (i = 1; i < chunk; i++) if (xen_p2m_elem_type(pfn + i) != type) break; if (i < chunk) /* Reset to minimal chunk size. */ chunk = P2M_PER_PAGE; if (type == P2M_TYPE_PFN || i < chunk) { /* Use initial p2m page contents. */ #ifdef CONFIG_X86_64 mfns = alloc_p2m_page(); copy_page(mfns, xen_p2m_addr + pfn); #else mfns = xen_p2m_addr + pfn; #endif ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); continue; } if (chunk == P2M_PER_PAGE) { /* Map complete missing or identity p2m-page. */ mfns = (type == P2M_TYPE_MISSING) ? p2m_missing : p2m_identity; ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); continue; } /* Complete missing or identity PMD(s) can be mapped. */ ptep = (type == P2M_TYPE_MISSING) ? p2m_missing_pte : p2m_identity_pte; for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pmdp = populate_extra_pmd( (unsigned long)(p2m + pfn) + i * PMD_SIZE); set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); } } }
int arch_domain_create(struct domain *d, unsigned int domcr_flags, struct xen_arch_domainconfig *config) { int rc; uint8_t gic_version; d->arch.relmem = RELMEM_not_started; /* Idle domains do not need this setup */ if ( is_idle_domain(d) ) return 0; ASSERT(config != NULL); if ( (rc = p2m_init(d)) != 0 ) goto fail; rc = -ENOMEM; if ( (d->shared_info = alloc_xenheap_pages(0, 0)) == NULL ) goto fail; /* Default the virtual ID to match the physical */ d->arch.vpidr = boot_cpu_data.midr.bits; clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); if ( (rc = domain_io_init(d)) != 0 ) goto fail; if ( (rc = p2m_alloc_table(d)) != 0 ) goto fail; /* * Currently the vGIC is emulating the same version of the * hardware GIC. Only the value XEN_DOMCTL_CONFIG_GIC_DEFAULT * is allowed. The DOMCTL will return the actual version of the * GIC. */ rc = -EOPNOTSUPP; if ( config->gic_version != XEN_DOMCTL_CONFIG_GIC_DEFAULT ) goto fail; switch ( gic_hw_version() ) { case GIC_V3: gic_version = XEN_DOMCTL_CONFIG_GIC_V3; break; case GIC_V2: gic_version = XEN_DOMCTL_CONFIG_GIC_V2; break; default: BUG(); } config->gic_version = gic_version; if ( (rc = gicv_setup(d)) != 0 ) goto fail; if ( (rc = domain_vgic_init(d)) != 0 ) goto fail; if ( (rc = domain_vtimer_init(d)) != 0 ) goto fail; /* * The hardware domain will get a PPI later in * arch/arm/domain_build.c depending on the * interrupt map of the hardware. */ if ( !is_hardware_domain(d) ) { d->arch.evtchn_irq = GUEST_EVTCHN_PPI; /* At this stage vgic_reserve_virq should never fail */ if ( !vgic_reserve_virq(d, GUEST_EVTCHN_PPI) ) BUG(); } /* * Virtual UART is only used by linux early printk and decompress code. * Only use it for the hardware domain because the linux kernel may not * support multi-platform. */ if ( is_hardware_domain(d) && (rc = domain_vuart_init(d)) ) goto fail; if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; return 0; fail: d->is_dying = DOMDYING_dead; arch_domain_destroy(d); return rc; }
int arch_domain_create(struct domain *d, unsigned int domcr_flags, struct xen_arch_domainconfig *config) { int rc, count = 0; BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS); d->arch.relmem = RELMEM_not_started; /* Idle domains do not need this setup */ if ( is_idle_domain(d) ) return 0; ASSERT(config != NULL); /* p2m_init relies on some value initialized by the IOMMU subsystem */ if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; if ( (rc = p2m_init(d)) != 0 ) goto fail; rc = -ENOMEM; if ( (d->shared_info = alloc_xenheap_pages(0, 0)) == NULL ) goto fail; /* Default the virtual ID to match the physical */ d->arch.vpidr = boot_cpu_data.midr.bits; clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); switch ( config->gic_version ) { case XEN_DOMCTL_CONFIG_GIC_NATIVE: switch ( gic_hw_version () ) { case GIC_V2: config->gic_version = XEN_DOMCTL_CONFIG_GIC_V2; d->arch.vgic.version = GIC_V2; break; case GIC_V3: config->gic_version = XEN_DOMCTL_CONFIG_GIC_V3; d->arch.vgic.version = GIC_V3; break; default: BUG(); } break; case XEN_DOMCTL_CONFIG_GIC_V2: d->arch.vgic.version = GIC_V2; break; case XEN_DOMCTL_CONFIG_GIC_V3: d->arch.vgic.version = GIC_V3; break; default: rc = -EOPNOTSUPP; goto fail; } if ( (rc = domain_vgic_register(d, &count)) != 0 ) goto fail; if ( (rc = domain_io_init(d, count + MAX_IO_HANDLER)) != 0 ) goto fail; if ( (rc = domain_vgic_init(d, config->nr_spis)) != 0 ) goto fail; if ( (rc = domain_vtimer_init(d, config)) != 0 ) goto fail; update_domain_wallclock_time(d); /* * The hardware domain will get a PPI later in * arch/arm/domain_build.c depending on the * interrupt map of the hardware. */ if ( !is_hardware_domain(d) ) { d->arch.evtchn_irq = GUEST_EVTCHN_PPI; /* At this stage vgic_reserve_virq should never fail */ if ( !vgic_reserve_virq(d, GUEST_EVTCHN_PPI) ) BUG(); } /* * Virtual UART is only used by linux early printk and decompress code. * Only use it for the hardware domain because the linux kernel may not * support multi-platform. */ if ( is_hardware_domain(d) && (rc = domain_vuart_init(d)) ) goto fail; return 0; fail: d->is_dying = DOMDYING_dead; arch_domain_destroy(d); return rc; }
/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ static bool alloc_p2m(unsigned long pfn) { unsigned topidx, mididx; unsigned long ***top_p, **mid; unsigned long *top_mfn_p, *mid_mfn; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); top_p = &p2m_top[topidx]; mid = *top_p; if (mid == p2m_mid_missing) { /* Mid level is missing, allocate a new one */ mid = alloc_p2m_page(); if (!mid) return false; p2m_mid_init(mid); if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) free_p2m_page(mid); } top_mfn_p = &p2m_top_mfn[topidx]; mid_mfn = p2m_top_mfn_p[topidx]; BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); if (mid_mfn == p2m_mid_missing_mfn) { /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) return false; p2m_mid_mfn_init(mid_mfn); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) free_p2m_page(mid_mfn); else p2m_top_mfn_p[topidx] = mid_mfn; } if (p2m_top[topidx][mididx] == p2m_identity || p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; unsigned long *p2m_orig = p2m_top[topidx][mididx]; p2m = alloc_p2m_page(); if (!p2m) return false; p2m_init(p2m); if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); } return true; }
/* * Skim over the P2M tree looking at pages that are either filled with * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and * replace the P2M leaf with a p2m_missing or p2m_identity. * Stick the old page in the new P2M tree location. */ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) { unsigned topidx; unsigned mididx; unsigned ident_pfns; unsigned inv_pfns; unsigned long *p2m; unsigned long *mid_mfn_p; unsigned idx; unsigned long pfn; /* We only look when this entails a P2M middle layer */ if (p2m_index(set_pfn)) return false; for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { topidx = p2m_top_index(pfn); if (!p2m_top[topidx]) continue; if (p2m_top[topidx] == p2m_mid_missing) continue; mididx = p2m_mid_index(pfn); p2m = p2m_top[topidx][mididx]; if (!p2m) continue; if ((p2m == p2m_missing) || (p2m == p2m_identity)) continue; if ((unsigned long)p2m == INVALID_P2M_ENTRY) continue; ident_pfns = 0; inv_pfns = 0; for (idx = 0; idx < P2M_PER_PAGE; idx++) { /* IDENTITY_PFNs are 1:1 */ if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) ident_pfns++; else if (p2m[idx] == INVALID_P2M_ENTRY) inv_pfns++; else break; } if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) goto found; } return false; found: /* Found one, replace old with p2m_identity or p2m_missing */ p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); /* And the other for save/restore.. */ mid_mfn_p = p2m_top_mfn_p[topidx]; /* NOTE: Even if it is a p2m_identity it should still be point to * a page filled with INVALID_P2M_ENTRY entries. */ mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); /* Reset where we want to stick the old page in. */ topidx = p2m_top_index(set_pfn); mididx = p2m_mid_index(set_pfn); /* This shouldn't happen */ if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) early_alloc_p2m(set_pfn); if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) return false; p2m_init(p2m); p2m_top[topidx][mididx] = p2m; mid_mfn_p = p2m_top_mfn_p[topidx]; mid_mfn_p[mididx] = virt_to_mfn(p2m); return true; }