static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pud_t *pud; pmd_t *pmd; unsigned long addr, next; int ret; addr = start; do { pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); return -ENOMEM; } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pud_addr_end(addr, end); ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (ret) return ret; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); return 0; }
static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, unsigned long end, int nid) { pmd_t *pmd; unsigned long next; if (pud_none(*pud)) { void *p; if (boot_cpu_has(X86_FEATURE_GBPAGES) && ((end - addr) == PUD_SIZE) && IS_ALIGNED(addr, PUD_SIZE)) { p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PUD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); pud_populate(&init_mm, pud, p); } pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (!pmd_large(*pmd)) kasan_populate_pmd(pmd, addr, next, nid); } while (pmd++, addr = next, addr != end); }
/** * kvm_mips_walk_pgd() - Walk page table with optional allocation. * @pgd: Page directory pointer. * @addr: Address to index page table using. * @cache: MMU page cache to allocate new page tables from, or NULL. * * Walk the page tables pointed to by @pgd to find the PTE corresponding to the * address @addr. If page tables don't exist for @addr, they will be created * from the MMU cache if @cache is not NULL. * * Returns: Pointer to pte_t corresponding to @addr. * NULL if a page table doesn't exist for @addr and !@cache. * NULL if a page table allocation failed. */ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, unsigned long addr) { pud_t *pud; pmd_t *pmd; pgd += pgd_index(addr); if (pgd_none(*pgd)) { /* Not used on MIPS yet */ BUG(); return NULL; } pud = pud_offset(pgd, addr); if (pud_none(*pud)) { pmd_t *new_pmd; if (!cache) return NULL; new_pmd = mmu_memory_cache_alloc(cache); pmd_init((unsigned long)new_pmd, (unsigned long)invalid_pte_table); pud_populate(NULL, pud, new_pmd); } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { pte_t *new_pte; if (!cache) return NULL; new_pte = mmu_memory_cache_alloc(cache); clear_page(new_pte); pmd_populate_kernel(NULL, pmd, new_pte); } return pte_offset(pmd, addr); }
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, unsigned long prot) { pmd_t *pmd; unsigned long next; if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { pmd = pmd_alloc_one(&init_mm, addr); if (!pmd) { pr_warning("Failed to allocate identity pmd.\n"); return; } /* * Copy the original PMD to ensure that the PMD entries for * the kernel image are preserved. */ if (!pud_none(*pud)) memcpy(pmd, pmd_offset(pud, 0), PTRS_PER_PMD * sizeof(pmd_t)); pud_populate(&init_mm, pud, pmd); pmd += pmd_index(addr); } else pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); *pmd = __pmd((addr & PMD_MASK) | prot); flush_pmd_entry(pmd); } while (pmd++, addr = next, addr != end); }
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, unsigned long end) { pmd_t *src_pmdp; pmd_t *dst_pmdp; unsigned long next; unsigned long addr = start; if (pud_none(READ_ONCE(*dst_pudp))) { dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!dst_pmdp) return -ENOMEM; pud_populate(&init_mm, dst_pudp, dst_pmdp); } dst_pmdp = pmd_offset(dst_pudp, start); src_pmdp = pmd_offset(src_pudp, start); do { pmd_t pmd = READ_ONCE(*src_pmdp); next = pmd_addr_end(addr, end); if (pmd_none(pmd)) continue; if (pmd_table(pmd)) { if (copy_pte(dst_pmdp, src_pmdp, addr, next)) return -ENOMEM; } else { set_pmd(dst_pmdp, __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); } } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); return 0; }
static pmd_t * __init one_md_table_init(pud_t *pud) { if (pud_none(*pud)) { pmd_t *pmd; pmd = alloc_bootmem_pages(PAGE_SIZE); pud_populate(&init_mm, pud, pmd); BUG_ON(pmd != pmd_offset(pud, 0)); } return pmd_offset(pud, 0); }
static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; /* * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be * present */ split_pud(pud, pmd); } pud_populate(mm, pud, pmd); flush_tlb_all(); } BUG_ON(pud_bad(*pud)); pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. */ if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); }
static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, unsigned long end) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; do { next = pud_addr_end(addr, end); if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { pmd_t *pmd; pud_populate(&init_mm, pud, kasan_zero_pmd); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); continue; } if (pud_none(*pud)) { pud_populate(&init_mm, pud, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } zero_pmd_populate(pud, addr, next); } while (pud++, addr = next, addr != end); }
static void __init kasan_early_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; if (pud_none(*pud)) pud_populate(&init_mm, pud, kasan_zero_pmd); pmd = pmd_offset_kimg(pud, addr); do { next = pmd_addr_end(addr, end); kasan_early_pte_populate(pmd, addr, next); } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); }
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, bool iomap) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } pmd = pmd_offset(pud, addr); /* Create 2nd stage page table mapping - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); } pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); return 0; }
static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { pud_t *pud; pmd_t *pmd; pud = stage2_get_pud(kvm, cache, addr); if (pud_none(*pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } return pmd_offset(pud, addr); }
static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, int nid, unsigned long region_start, unsigned long region_end) { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, region_start, region_end); pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, region_start, region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE, nid, region_start, region_end); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; }
/* * map_kernel_page currently only called by __ioremap * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; } else { pgdp = pgd_offset_k(ea); #ifndef __PAGETABLE_PUD_FOLDED if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE); pgd_populate(&init_mm, pgdp, pudp); } #endif /* !__PAGETABLE_PUD_FOLDED */ pudp = pud_offset(pgdp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); } set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); smp_wmb(); return 0; }
static int __create_hyp_mappings(pgd_t *pgdp, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; }
/** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page * @shadow_start - start of the memory range to populate * @shadow_end - end of the memory range to populate */ void __init kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end) { unsigned long addr = (unsigned long)shadow_start; unsigned long end = (unsigned long)shadow_end; pgd_t *pgd = pgd_offset_k(addr); unsigned long next; do { next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { pud_t *pud; pmd_t *pmd; /* * kasan_zero_pud should be populated with pmds * at this moment. * [pud,pmd]_populate*() below needed only for * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ pgd_populate(&init_mm, pgd, kasan_zero_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, kasan_zero_pmd); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); continue; } if (pgd_none(*pgd)) { pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } zero_pud_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, unsigned long prot) { pmd_t *pmd; unsigned long next; if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { pr_warning("Failed to allocate identity pmd.\n"); return; } pud_populate(NULL, pud, pmd); pmd += pmd_index(addr); } else pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); *pmd = __pmd((addr & PMD_MASK) | prot); flush_pmd_entry(pmd); } while (pmd++, addr = next, addr != end); }
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } hwpgd = kvm_alloc_hwpgd(); if (!hwpgd) return -ENOMEM; /* When the kernel uses more levels of page tables than the * guest, we allocate a fake PGD and pre-populate it to point * to the next-level page table, which will be the real * initial page table pointed to by the VTTBR. * * When KVM_PREALLOC_LEVEL==2, we allocate a single page for * the PMD and the kernel will use folded pud. * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD * pages. */ if (KVM_PREALLOC_LEVEL > 0) { int i; /* * Allocate fake pgd for the page table manipulation macros to * work. This is not used by the hardware and we have no * alignment requirement for this allocation. */ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO); if (!pgd) { kvm_free_hwpgd(hwpgd); return -ENOMEM; } /* Plug the HW PGD into the fake one. */ for (i = 0; i < PTRS_PER_S2_PGD; i++) { if (KVM_PREALLOC_LEVEL == 1) pgd_populate(NULL, pgd + i, (pud_t *)hwpgd + i * PTRS_PER_PUD); else if (KVM_PREALLOC_LEVEL == 2) pud_populate(NULL, pud_offset(pgd, 0) + i, (pmd_t *)hwpgd + i * PTRS_PER_PMD); } } else { /* * Allocate actual first-level Stage-2 page table used by the * hardware for Stage-2 page table walks. */ pgd = (pgd_t *)hwpgd; } kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }
/* * Copies length bytes, starting at src_start into an new page, * perform cache maintentance, then maps it at the specified address low * address as executable. * * This is used by hibernate to copy the code it needs to execute when * overwriting the kernel text. This function generates a new set of page * tables, which it loads into ttbr0. * * Length is provided as we probably only want 4K of data, even on a 64K * page system. */ static int create_safe_exec_page(void *src_start, size_t length, unsigned long dst_addr, phys_addr_t *phys_dst_addr, void *(*allocator)(gfp_t mask), gfp_t mask) { int rc = 0; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long dst = (unsigned long)allocator(mask); if (!dst) { rc = -ENOMEM; goto out; } memcpy((void *)dst, src_start, length); flush_icache_range(dst, dst + length); pgdp = pgd_offset_raw(allocator(mask), dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { pudp = allocator(mask); if (!pudp) { rc = -ENOMEM; goto out; } pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, dst_addr); if (pud_none(READ_ONCE(*pudp))) { pmdp = allocator(mask); if (!pmdp) { rc = -ENOMEM; goto out; } pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, dst_addr); if (pmd_none(READ_ONCE(*pmdp))) { ptep = allocator(mask); if (!ptep) { rc = -ENOMEM; goto out; } pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we * ensure that TLBs are free of any entries that may overlap with the * global mappings we are about to install. * * For a real hibernate/resume cycle TTBR0 currently points to a zero * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI * runtime services), while for a userspace-driven test_resume cycle it * points to userspace page tables (and we must point it at a zero page * ourselves). Elsewhere we only (un)install the idmap with preemption * disabled, so T0SZ should be as required regardless. */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys((void *)dst); out: return rc; }