/* * Build the parallel p2m_top_mfn and p2m_mid_mfn structures * * This is called both at boot time, and after resuming from suspend: * - At boot time we're called rather early, and must use alloc_bootmem*() * to allocate memory. * * - After resume we're called from within stop_machine, but the mfn * tree should already be completely allocated. */ void __ref xen_build_mfn_list_list(void) { unsigned long pfn, mfn; pte_t *ptep; unsigned int level, topidx, mididx; unsigned long *mid_mfn_p; if (xen_feature(XENFEAT_auto_translated_physmap)) return; /* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) { p2m_mid_missing_mfn = alloc_p2m_page(); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); p2m_top_mfn_p = alloc_p2m_page(); p2m_top_mfn_p_init(p2m_top_mfn_p); p2m_top_mfn = alloc_p2m_page(); p2m_top_mfn_init(p2m_top_mfn); } else { /* Reinitialise, mfn's all change after migration */ p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); } for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; pfn += P2M_PER_PAGE) { topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); mid_mfn_p = p2m_top_mfn_p[topidx]; ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); BUG_ON(!ptep || level != PG_LEVEL_4K); mfn = pte_mfn(*ptep); ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); /* Don't bother allocating any mfn mid levels if * they're just missing, just update the stored mfn, * since all could have changed over a migrate. */ if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { BUG_ON(mididx); BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; continue; } if (mid_mfn_p == p2m_mid_missing_mfn) { mid_mfn_p = alloc_p2m_page(); p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_top_mfn_p[topidx] = mid_mfn_p; } p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); mid_mfn_p[mididx] = mfn; } }
/* * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! */ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { pte_t *ptechk; pte_t *pte_newpg[PMDS_PER_MID_PAGE]; pmd_t *pmdp; unsigned int level; unsigned long flags; unsigned long vaddr; int i; /* Do all allocations first to bail out in error case. */ for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pte_newpg[i] = alloc_p2m_page(); if (!pte_newpg[i]) { for (i--; i >= 0; i--) free_p2m_page(pte_newpg[i]); return NULL; } } vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); for (i = 0; i < PMDS_PER_MID_PAGE; i++) { copy_page(pte_newpg[i], pte_pg); paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); pmdp = lookup_pmd_address(vaddr); BUG_ON(!pmdp); spin_lock_irqsave(&p2m_update_lock, flags); ptechk = lookup_address(vaddr, &level); if (ptechk == pte_pg) { HYPERVISOR_shared_info->arch.p2m_generation++; wmb(); /* Tools are synchronizing via p2m_generation. */ set_pmd(pmdp, __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); wmb(); /* Tools are synchronizing via p2m_generation. */ HYPERVISOR_shared_info->arch.p2m_generation++; pte_newpg[i] = NULL; } spin_unlock_irqrestore(&p2m_update_lock, flags); if (pte_newpg[i]) { paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); free_p2m_page(pte_newpg[i]); } vaddr += PMD_SIZE; } return lookup_address(addr, &level); }
/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ static bool alloc_p2m(unsigned long pfn) { unsigned topidx; unsigned long *top_mfn_p, *mid_mfn; pte_t *ptep, *pte_pg; unsigned int level; unsigned long flags; unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); unsigned long p2m_pfn; ptep = lookup_address(addr, &level); BUG_ON(!ptep || level != PG_LEVEL_4K); pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */ ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return false; } if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); if (mid_mfn == p2m_mid_missing_mfn) { /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; unsigned long old_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) return false; p2m_mid_mfn_init(mid_mfn, p2m_missing); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); if (old_mfn != missing_mfn) { free_p2m_page(mid_mfn); mid_mfn = mfn_to_virt(old_mfn); } else { p2m_top_mfn_p[topidx] = mid_mfn; } } } else { mid_mfn = NULL; } p2m_pfn = pte_pfn(READ_ONCE(*ptep)); if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { /* p2m leaf page is missing */ unsigned long *p2m; p2m = alloc_p2m_page(); if (!p2m) return false; if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); else p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); spin_lock_irqsave(&p2m_update_lock, flags); if (pte_pfn(*ptep) == p2m_pfn) { HYPERVISOR_shared_info->arch.p2m_generation++; wmb(); /* Tools are synchronizing via p2m_generation. */ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); wmb(); /* Tools are synchronizing via p2m_generation. */ HYPERVISOR_shared_info->arch.p2m_generation++; if (mid_mfn) mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m); p2m = NULL; } spin_unlock_irqrestore(&p2m_update_lock, flags); if (p2m) free_p2m_page(p2m); } /* Expanded the p2m? */ if (pfn > xen_p2m_last_pfn) { xen_p2m_last_pfn = pfn; HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } return true; }
static void __init xen_rebuild_p2m_list(unsigned long *p2m) { unsigned int i, chunk; unsigned long pfn; unsigned long *mfns; pte_t *ptep; pmd_t *pmdp; int type; p2m_missing = alloc_p2m_page(); p2m_init(p2m_missing); p2m_identity = alloc_p2m_page(); p2m_init(p2m_identity); p2m_missing_pte = alloc_p2m_page(); paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); p2m_identity_pte = alloc_p2m_page(); paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); for (i = 0; i < PTRS_PER_PTE; i++) { set_pte(p2m_missing_pte + i, pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); set_pte(p2m_identity_pte + i, pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); } for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { /* * Try to map missing/identity PMDs or p2m-pages if possible. * We have to respect the structure of the mfn_list_list * which will be built just afterwards. * Chunk size to test is one p2m page if we are in the middle * of a mfn_list_list mid page and the complete mid page area * if we are at index 0 of the mid page. Please note that a * mid page might cover more than one PMD, e.g. on 32 bit PAE * kernels. */ chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; type = xen_p2m_elem_type(pfn); i = 0; if (type != P2M_TYPE_PFN) for (i = 1; i < chunk; i++) if (xen_p2m_elem_type(pfn + i) != type) break; if (i < chunk) /* Reset to minimal chunk size. */ chunk = P2M_PER_PAGE; if (type == P2M_TYPE_PFN || i < chunk) { /* Use initial p2m page contents. */ #ifdef CONFIG_X86_64 mfns = alloc_p2m_page(); copy_page(mfns, xen_p2m_addr + pfn); #else mfns = xen_p2m_addr + pfn; #endif ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); continue; } if (chunk == P2M_PER_PAGE) { /* Map complete missing or identity p2m-page. */ mfns = (type == P2M_TYPE_MISSING) ? p2m_missing : p2m_identity; ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); continue; } /* Complete missing or identity PMD(s) can be mapped. */ ptep = (type == P2M_TYPE_MISSING) ? p2m_missing_pte : p2m_identity_pte; for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pmdp = populate_extra_pmd( (unsigned long)(p2m + pfn) + i * PMD_SIZE); set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); } } }
/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ static bool alloc_p2m(unsigned long pfn) { unsigned topidx, mididx; unsigned long ***top_p, **mid; unsigned long *top_mfn_p, *mid_mfn; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); top_p = &p2m_top[topidx]; mid = *top_p; if (mid == p2m_mid_missing) { /* Mid level is missing, allocate a new one */ mid = alloc_p2m_page(); if (!mid) return false; p2m_mid_init(mid); if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) free_p2m_page(mid); } top_mfn_p = &p2m_top_mfn[topidx]; mid_mfn = p2m_top_mfn_p[topidx]; BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); if (mid_mfn == p2m_mid_missing_mfn) { /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) return false; p2m_mid_mfn_init(mid_mfn); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) free_p2m_page(mid_mfn); else p2m_top_mfn_p[topidx] = mid_mfn; } if (p2m_top[topidx][mididx] == p2m_identity || p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; unsigned long *p2m_orig = p2m_top[topidx][mididx]; p2m = alloc_p2m_page(); if (!p2m) return false; p2m_init(p2m); if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); } return true; }