/* * We need our own copy of the higher levels of the page tables * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). */ int __init efi_alloc_page_tables(void) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; gfp_t gfp_mask; if (efi_enabled(EFI_OLD_MEMMAP)) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); if (!p4d) { free_page((unsigned long)efi_pgd); return -ENOMEM; } pud = pud_alloc(&init_mm, p4d, EFI_VA_END); if (!pud) { if (CONFIG_PGTABLE_LEVELS > 4) free_page((unsigned long) pgd_page_vaddr(*pgd)); free_page((unsigned long)efi_pgd); return -ENOMEM; } return 0; }
/* * Create PGD aligned trampoline table to allow real mode initialization * of additional CPUs. Consume only 1 low memory page. */ void __meminit init_trampoline(void) { unsigned long paddr, paddr_next; pgd_t *pgd; pud_t *pud_page, *pud_page_tramp; int i; if (!kaslr_memory_enabled()) { init_trampoline_default(); return; } pud_page_tramp = alloc_low_page(); paddr = 0; pgd = pgd_offset_k((unsigned long)__va(paddr)); pud_page = (pud_t *) pgd_page_vaddr(*pgd); for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { pud_t *pud, *pud_tramp; unsigned long vaddr = (unsigned long)__va(paddr); pud_tramp = pud_page_tramp + pud_index(paddr); pud = pud_page + pud_index(vaddr); paddr_next = (paddr & PUD_MASK) + PUD_SIZE; *pud_tramp = *pud; } set_pgd(&trampoline_pgd_entry, __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); }
int exit_mm(struct mm_struct *mm) { pmd_t *pmd; pgd_t *pgd; uint32_t pgdno, pmdno; physaddr_t pa; struct vm_area_struct* vma = mm->mmap; struct page *page; if(!mm || !mm->mm_pgd) return 0; if(!atomic_dec_and_test(&mm->mm_count)) return 0; delete_all_vma(mm); for (pgdno = 0; pgdno < pgd_index(KERNEL_BASE_ADDR); pgdno++) { pgd = mm->mm_pgd + pgdno; if(!pgd_present(*pgd) || pgd_none(*pgd)) continue; pmd_t* tmp = (pmd_t *)pgd_page_vaddr(*pgd); for (pmdno = 0; pmdno < PTRS_PER_PMD; pmdno++) { pmd = tmp + pmdno; if(!pmd_present(*pmd) || pmd_none(*pmd)) continue; struct page* p = virt2page(pmd_page_vaddr(*pmd)); page_decref(p); pmd_set(pmd,0,0); } struct page* p = virt2page(pgd_page_vaddr(*pgd)); page_decref(p); pgd_set(pgd,0,0); } page = virt2page((viraddr_t)mm->mm_pgd); page_free(page); kfree(mm); return 0; }
void set_pmde(pgd_t * pgde,struct page* pf,viraddr_t address,uint32_t perm) { #ifdef DEBUG assert(pgde); assert(pf); assert(page2pfn(pf) || !page2pfn(pf)); #endif pmd_t * pmde = NULL; pmde = (pmd_t *)pgd_page_vaddr(*pgde) + pmd_index(address); pmd_val(*pmde) = page2phys(pf) | perm; }
/* If we allocate a pmd for part of the kernel address space, then make sure its initialized with the appropriate kernel mappings. Otherwise use a cached zeroed pmd. */ static pmd_t *pmd_cache_alloc(int idx) { pmd_t *pmd; if (idx >= USER_PTRS_PER_PGD) { pmd = (pmd_t *)__get_free_page(GFP_KERNEL); if (pmd) memcpy(pmd, (void *)pgd_page_vaddr(swapper_pg_dir[idx]), sizeof(pmd_t) * PTRS_PER_PMD); } else pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); return pmd; }
void __init efi_call_phys_epilog(pgd_t *save_pgd) { /* * After the lock is released, the original page table is restored. */ int pgd_idx, i; int nr_pgds; pgd_t *pgd; p4d_t *p4d; pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { write_cr3((unsigned long)save_pgd); __flush_tlb_all(); return; } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); if (!(pgd_val(*pgd) & _PAGE_PRESENT)) continue; for (i = 0; i < PTRS_PER_P4D; i++) { p4d = p4d_offset(pgd, pgd_idx * PGDIR_SIZE + i * P4D_SIZE); if (!(p4d_val(*p4d) & _PAGE_PRESENT)) continue; pud = (pud_t *)p4d_page_vaddr(*p4d); pud_free(&init_mm, pud); } p4d = (p4d_t *)pgd_page_vaddr(*pgd); p4d_free(&init_mm, p4d); } kfree(save_pgd); __flush_tlb_all(); early_code_mapping_set_exec(0); }
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; pud_t *start; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); if (!pud_none(*start)) { pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; if (pud_large(*start) || !pud_present(*start)) note_page(m, st, __pgprot(prot), 2); else walk_pmd_level(m, st, *start, P + i * PUD_LEVEL_MULT); } else note_page(m, st, __pgprot(0), 2); start++; } }
/* * We need our own copy of the higher levels of the page tables * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). * * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the * allocation. */ int __init efi_alloc_page_tables(void) { pgd_t *pgd, *efi_pgd; p4d_t *p4d; pud_t *pud; gfp_t gfp_mask; if (efi_enabled(EFI_OLD_MEMMAP)) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); if (!p4d) { free_page((unsigned long)efi_pgd); return -ENOMEM; } pud = pud_alloc(&init_mm, p4d, EFI_VA_END); if (!pud) { if (pgtable_l5_enabled) free_page((unsigned long) pgd_page_vaddr(*pgd)); free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); return -ENOMEM; } efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; }
/* 4 level page table */ struct page *pgd_page(pgd_t pgd) { if (pgd_huge(pgd)) return pte_page(pgd_pte(pgd)); return virt_to_page(pgd_page_vaddr(pgd)); }
void __init kasan_init(void) { int i; void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; #ifdef CONFIG_KASAN_INLINE register_die_notifier(&kasan_die_notifier); #endif memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); /* * We use the same shadow offset for 4- and 5-level paging to * facilitate boot-time switching between paging modes. * As result in 5-level paging mode KASAN_SHADOW_START and * KASAN_SHADOW_END are not aligned to PGD boundary. * * KASAN_SHADOW_START doesn't share PGD with anything else. * We claim whole PGD entry to make things easier. * * KASAN_SHADOW_END lands in the last PGD entry and it collides with * bunch of things like kernel code, modules, EFI mapping, etc. * We need to take extra steps to not overwrite them. */ if (pgtable_l5_enabled()) { void *ptr; ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); } load_cr3(early_top_pgt); __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END); kasan_populate_early_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), kasan_mem_to_shadow((void *)PAGE_OFFSET)); for (i = 0; i < E820_MAX_ENTRIES; i++) { if (pfn_mapped[i].end == 0) break; map_range(&pfn_mapped[i]); } shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); shadow_cpu_entry_begin = (void *)round_down( (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); shadow_cpu_entry_end = (void *)round_up( (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), shadow_cpu_entry_begin); kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, (unsigned long)shadow_cpu_entry_end, 0); kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), (unsigned long)kasan_mem_to_shadow(_end), early_pfn_to_nid(__pa(_stext))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); /* * kasan_early_shadow_page has been used as early shadow memory, thus * it may contain some garbage. Now we can clear and write protect it, * since after the TLB flush no one should write to it. */ memset(kasan_early_shadow_page, 0, PAGE_SIZE); for (i = 0; i < PTRS_PER_PTE; i++) { pte_t pte; pgprot_t prot; prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC); pgprot_val(prot) &= __default_kernel_pte_mask; pte = __pte(__pa(kasan_early_shadow_page) | pgprot_val(prot)); set_pte(&kasan_early_shadow_pte[i], pte); } /* Flush TLBs again to be sure that write protection applied. */ __flush_tlb_all(); init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); }