/* * We need our own copy of the higher levels of the page tables * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). */ int __init efi_alloc_page_tables(void) { pgd_t *pgd; pud_t *pud; gfp_t gfp_mask; if (efi_enabled(EFI_OLD_MEMMAP)) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; pgd = efi_pgd + pgd_index(EFI_VA_END); pud = pud_alloc_one(NULL, 0); if (!pud) { free_page((unsigned long)efi_pgd); return -ENOMEM; } pgd_populate(NULL, pgd, pud); return 0; }
static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, unsigned long end) { pud_t *dst_pudp; pud_t *src_pudp; unsigned long next; unsigned long addr = start; if (pgd_none(READ_ONCE(*dst_pgdp))) { dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); if (!dst_pudp) return -ENOMEM; pgd_populate(&init_mm, dst_pgdp, dst_pudp); } dst_pudp = pud_offset(dst_pgdp, start); src_pudp = pud_offset(src_pgdp, start); do { pud_t pud = READ_ONCE(*src_pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) continue; if (pud_table(pud)) { if (copy_pmd(dst_pudp, src_pudp, addr, next)) return -ENOMEM; } else { set_pud(dst_pudp, __pud(pud_val(pud) & ~PMD_SECT_RDONLY)); } } while (dst_pudp++, src_pudp++, addr = next, addr != end); return 0; }
void __init init_espfix_bsp(void) { pgd_t *pgd_p; pteval_t ptemask; ptemask = __supported_pte_mask; /* Install the espfix pud into the kernel page directory */ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); /* Randomize the locations */ init_espfix_random(); /* The rest is the same as for any other processor */ init_espfix_ap(); }
/** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page * @shadow_start - start of the memory range to populate * @shadow_end - end of the memory range to populate */ void __init kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end) { unsigned long addr = (unsigned long)shadow_start; unsigned long end = (unsigned long)shadow_end; pgd_t *pgd = pgd_offset_k(addr); unsigned long next; do { next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { pud_t *pud; pmd_t *pmd; /* * kasan_zero_pud should be populated with pmds * at this moment. * [pud,pmd]_populate*() below needed only for * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ pgd_populate(&init_mm, pgd, kasan_zero_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, kasan_zero_pmd); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); continue; } if (pgd_none(*pgd)) { pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } zero_pud_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void __init kasan_early_pud_populate(pgd_t *pgd, unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, kasan_zero_pud); pud = pud_offset_kimg(pgd, addr); do { next = pud_addr_end(addr, end); kasan_early_pmd_populate(pud, addr, next); } while (pud++, addr = next, addr != end && pud_none(*pud)); }
static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); /* * For 4K granule only, attempt to put down a 1GB block */ if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will * be pointing to a pmd table that we no longer * need (from swapper_pg_dir). * * Look up the old pmd table and free it. */ if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); }
static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pgd = kvm->arch.pgd + kvm_pgd_index(addr); if (WARN_ON(pgd_none(*pgd))) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); pgd_populate(NULL, pgd, pud); get_page(virt_to_page(pgd)); } return pud_offset(pgd, addr); }
static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, int nid, unsigned long region_start, unsigned long region_end) { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, region_start, region_end); pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, region_start, region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE, nid, region_start, region_end); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; }
static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, unsigned long end, int nid) { void *p; p4d_t *p4d; unsigned long next; if (pgd_none(*pgd)) { p = early_alloc(PAGE_SIZE, nid, true); pgd_populate(&init_mm, pgd, p); } p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); kasan_populate_p4d(p4d, addr, next, nid); } while (p4d++, addr = next, addr != end); }
/* * map_kernel_page currently only called by __ioremap * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; } else { pgdp = pgd_offset_k(ea); #ifndef __PAGETABLE_PUD_FOLDED if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE); pgd_populate(&init_mm, pgdp, pudp); } #endif /* !__PAGETABLE_PUD_FOLDED */ pudp = pud_offset(pgdp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); } set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); smp_wmb(); return 0; }
/* * Add a physical memory range to the 1:1 mapping. */ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) { unsigned long address; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; pte_t pte; int ret = -ENOMEM; for (address = start; address < start + size; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; <<<<<<< HEAD pgd_populate(&init_mm, pg_dir, pu_dir); =======
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { unsigned long *table, *pgd; unsigned long entry; BUG_ON(limit > (1UL << 53)); repeat: table = crst_table_alloc(mm, mm->context.noexec); if (!table) return -ENOMEM; spin_lock(&mm->page_table_lock); if (mm->context.asce_limit < limit) { pgd = (unsigned long *) mm->pgd; if (mm->context.asce_limit <= (1UL << 31)) { entry = _REGION3_ENTRY_EMPTY; mm->context.asce_limit = 1UL << 42; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; } else { entry = _REGION2_ENTRY_EMPTY; mm->context.asce_limit = 1UL << 53; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; } crst_table_init(table, entry); pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); mm->pgd = (pgd_t *) table; table = NULL; } spin_unlock(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) goto repeat; update_mm(mm, current); return 0; }
static int __create_hyp_mappings(pgd_t *pgdp, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; unsigned long addr, next; int err = 0; mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { pgd = pgdp + pgd_index(addr); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); if (!pud) { kvm_err("Cannot allocate Hyp pud\n"); err = -ENOMEM; goto out; } pgd_populate(NULL, pgd, pud); get_page(virt_to_page(pgd)); kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); } next = pgd_addr_end(addr, end); err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; }
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } hwpgd = kvm_alloc_hwpgd(); if (!hwpgd) return -ENOMEM; /* When the kernel uses more levels of page tables than the * guest, we allocate a fake PGD and pre-populate it to point * to the next-level page table, which will be the real * initial page table pointed to by the VTTBR. * * When KVM_PREALLOC_LEVEL==2, we allocate a single page for * the PMD and the kernel will use folded pud. * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD * pages. */ if (KVM_PREALLOC_LEVEL > 0) { int i; /* * Allocate fake pgd for the page table manipulation macros to * work. This is not used by the hardware and we have no * alignment requirement for this allocation. */ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO); if (!pgd) { kvm_free_hwpgd(hwpgd); return -ENOMEM; } /* Plug the HW PGD into the fake one. */ for (i = 0; i < PTRS_PER_S2_PGD; i++) { if (KVM_PREALLOC_LEVEL == 1) pgd_populate(NULL, pgd + i, (pud_t *)hwpgd + i * PTRS_PER_PUD); else if (KVM_PREALLOC_LEVEL == 2) pud_populate(NULL, pud_offset(pgd, 0) + i, (pmd_t *)hwpgd + i * PTRS_PER_PMD); } } else { /* * Allocate actual first-level Stage-2 page table used by the * hardware for Stage-2 page table walks. */ pgd = (pgd_t *)hwpgd; } kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }
/* * Copies length bytes, starting at src_start into an new page, * perform cache maintentance, then maps it at the specified address low * address as executable. * * This is used by hibernate to copy the code it needs to execute when * overwriting the kernel text. This function generates a new set of page * tables, which it loads into ttbr0. * * Length is provided as we probably only want 4K of data, even on a 64K * page system. */ static int create_safe_exec_page(void *src_start, size_t length, unsigned long dst_addr, phys_addr_t *phys_dst_addr, void *(*allocator)(gfp_t mask), gfp_t mask) { int rc = 0; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long dst = (unsigned long)allocator(mask); if (!dst) { rc = -ENOMEM; goto out; } memcpy((void *)dst, src_start, length); flush_icache_range(dst, dst + length); pgdp = pgd_offset_raw(allocator(mask), dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { pudp = allocator(mask); if (!pudp) { rc = -ENOMEM; goto out; } pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, dst_addr); if (pud_none(READ_ONCE(*pudp))) { pmdp = allocator(mask); if (!pmdp) { rc = -ENOMEM; goto out; } pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, dst_addr); if (pmd_none(READ_ONCE(*pmdp))) { ptep = allocator(mask); if (!ptep) { rc = -ENOMEM; goto out; } pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we * ensure that TLBs are free of any entries that may overlap with the * global mappings we are about to install. * * For a real hibernate/resume cycle TTBR0 currently points to a zero * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI * runtime services), while for a userspace-driven test_resume cycle it * points to userspace page tables (and we must point it at a zero page * ourselves). Elsewhere we only (un)install the idmap with preemption * disabled, so T0SZ should be as required regardless. */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys((void *)dst); out: return rc; }