static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { int i = pud_index(addr); for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; if (addr >= end) break; if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { set_pud(pud, __pud(0)); continue; } if (pud_val(*pud)) { phys_pmd_update(pud, addr, end); continue; } pmd = alloc_low_page(&pmd_phys); spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); phys_pmd_init(pmd, addr, end); spin_unlock(&init_mm.page_table_lock); unmap_low_page(pmd); } __flush_tlb(); }
/* * Create PGD aligned trampoline table to allow real mode initialization * of additional CPUs. Consume only 1 low memory page. */ void __meminit init_trampoline(void) { unsigned long paddr, paddr_next; pgd_t *pgd; pud_t *pud_page, *pud_page_tramp; int i; if (!kaslr_memory_enabled()) { init_trampoline_default(); return; } pud_page_tramp = alloc_low_page(); paddr = 0; pgd = pgd_offset_k((unsigned long)__va(paddr)); pud_page = (pud_t *) pgd_page_vaddr(*pgd); for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { pud_t *pud, *pud_tramp; unsigned long vaddr = (unsigned long)__va(paddr); pud_tramp = pud_page_tramp + pud_index(paddr); pud = pud_page + pud_index(vaddr); paddr_next = (paddr & PUD_MASK) + PUD_SIZE; *pud_tramp = *pud; } set_pgd(&trampoline_pgd_entry, __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); }
/* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { unsigned num_entries; pgd_t *pgd_k, *pgd_efi; pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; /* * We can share all PGD entries apart from the one entry that * covers the EFI runtime mapping space. * * Make sure the EFI runtime region mappings are guaranteed to * only span a single PGD entry and that the entry also maps * other important kernel regions. */ BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != (EFI_VA_END & PGDIR_MASK)); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); /* * We share all the PUD entries apart from those that map the * EFI regions. Copy around them. */ BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pud_efi = pud_offset(pgd_efi, 0); pgd_k = pgd_offset_k(EFI_VA_END); pud_k = pud_offset(pgd_k, 0); num_entries = pud_index(EFI_VA_END); memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); pud_efi = pud_offset(pgd_efi, EFI_VA_START); pud_k = pud_offset(pgd_k, EFI_VA_START); num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); }
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) { long i, j; i = pud_index(address); pud = pud + i; for (; i < PTRS_PER_PUD; pud++, i++) { unsigned long paddr; pmd_t *pmd; paddr = address + i*PUD_SIZE; if (paddr >= end) break; pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!pmd) return -ENOMEM; set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { unsigned long pe; if (paddr >= end) break; pe = __PAGE_KERNEL_LARGE_EXEC | paddr; pe &= __supported_pte_mask; set_pmd(pmd, __pmd(pe)); } } return 0; }
static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; pud_t *pud; /* * The new mapping only has to cover the page containing the image * kernel's entry point (jump_address_phys), because the switch over to * it is carried out by relocated code running from a page allocated * specifically for this purpose and covered by the identity mapping, so * the temporary kernel text mapping is only needed for the final jump. * Moreover, in that mapping the virtual address of the image kernel's * entry point must be the same as its virtual address in the image * kernel (restore_jump_address), so the image kernel's * restore_registers() code doesn't find itself in a different area of * the virtual address space after switching over to the original page * tables used by the image kernel. */ pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!pmd) return -ENOMEM; set_pmd(pmd + pmd_index(restore_jump_address), __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); set_pud(pud + pud_index(restore_jump_address), __pud(__pa(pmd) | _KERNPG_TABLE)); set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE)); return 0; }
static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, unsigned long addr, unsigned long end) { unsigned long next; for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; if (pud_present(*pud)) { pmd = pmd_offset(pud, 0); ident_pmd_init(info, pmd, addr, next); continue; } pmd = (pmd_t *)info->alloc_pgt_page(info->context); if (!pmd) return -ENOMEM; ident_pmd_init(info, pmd, addr, next); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); } return 0; }
void init_espfix_ap(void) { unsigned int cpu, page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; int n; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ if (likely(this_cpu_read(espfix_stack))) return; /* Already initialized */ cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); }
static void __meminit early_make_page_readonly(void *va, unsigned int feature) { unsigned long addr, _va = (unsigned long)va; pte_t pte, *ptep; unsigned long *page = (unsigned long *) init_level4_pgt; BUG_ON(after_bootmem); if (xen_feature(feature)) return; addr = (unsigned long) page[pgd_index(_va)]; addr_to_page(addr, page); addr = page[pud_index(_va)]; addr_to_page(addr, page); addr = page[pmd_index(_va)]; addr_to_page(addr, page); ptep = (pte_t *) &page[pte_index(_va)]; pte.pte = ptep->pte & ~_PAGE_RW; if (HYPERVISOR_update_va_mapping(_va, pte, 0)) BUG(); }
static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; pud_t *pud; p4d_t *p4d = NULL; pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE); pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC); /* Filter out unsupported __PAGE_KERNEL* bits: */ pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask; pgprot_val(pgtable_prot) &= __default_kernel_pte_mask; /* * The new mapping only has to cover the page containing the image * kernel's entry point (jump_address_phys), because the switch over to * it is carried out by relocated code running from a page allocated * specifically for this purpose and covered by the identity mapping, so * the temporary kernel text mapping is only needed for the final jump. * Moreover, in that mapping the virtual address of the image kernel's * entry point must be the same as its virtual address in the image * kernel (restore_jump_address), so the image kernel's * restore_registers() code doesn't find itself in a different area of * the virtual address space after switching over to the original page * tables used by the image kernel. */ if (pgtable_l5_enabled()) { p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); if (!p4d) return -ENOMEM; } pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!pmd) return -ENOMEM; set_pmd(pmd + pmd_index(restore_jump_address), __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot))); set_pud(pud + pud_index(restore_jump_address), __pud(__pa(pmd) | pgprot_val(pgtable_prot))); if (p4d) { p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot)); pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot)); set_p4d(p4d + p4d_index(restore_jump_address), new_p4d); set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } else { /* No p4d for 4-level paging: point the pgd to the pud page table */ pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot)); set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } return 0; }
unsigned long virtaddr_to_physaddr(struct mm_struct *mm, unsigned long vaddr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long paddr = 0; pgd = pgd_offset(mm, vaddr); printk("pgd_val = 0x%lx\n", pgd_val(*pgd)); printk("pgd_index = %lu\n", pgd_index(vaddr)); if (pgd_none(*pgd)) { printk("not mapped in pgd\n"); return INVALID_ADDR; } pud = pud_offset(pgd, vaddr); printk("pud_val = 0x%lx\n", pud_val(*pud)); printk("pud_index = %lu\n", pud_index(vaddr)); if (pud_none(*pud)) { printk("not mapped in pud\n"); return INVALID_ADDR; } pmd = pmd_offset(pud, vaddr); printk("pmd_val = 0x%lx\n", pmd_val(*pmd)); printk("pmd_index = %lx\n", pmd_index(vaddr)); if(pmd_none(*pmd)){ printk("not mapped in pmd\n"); return INVALID_ADDR; } /*If pmd_large is true, represent pmd is the last level*/ if(pmd_large(*pmd)){ paddr = (pmd_val(*pmd) & PAGE_MASK); paddr = paddr | (vaddr & ~PAGE_MASK); return paddr; } /*Walk the forth level page table ** you may use PAGE_MASK = 0xfffffffffffff000 to help you get [0:11] bits ***/ else{ /* XXX: Need to implement */ pte = pte_offset_kernel(pmd, vaddr); printk("pte_val = 0x%lx\n", pte_val(*pte)); printk("pte_index = %lx\n", pte_index(vaddr)); if(pte_none(*pte)){ printk("not mapped in pte\n"); return INVALID_ADDR; } paddr = (pte_val(*pte) & PAGE_MASK); paddr = paddr | (vaddr & ~PAGE_MASK); printk("paddr = %lx\n", paddr); printk("__pa = %lx\n", __pa(vaddr)); /* magic macro in the kernel */ /* End of implement */ return paddr; } }
/* Create a new PMD entry */ int __init early_make_pgtable(unsigned long address) { unsigned long physaddr = address - __PAGE_OFFSET; unsigned long i; pgdval_t pgd, *pgd_p; pudval_t pud, *pud_p; pmdval_t pmd, *pmd_p; /* Invalid address or early pgt is done ? */ if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) return -1; again: pgd_p = &early_level4_pgt[pgd_index(address)].pgd; pgd = *pgd_p; /* * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ if (pgd) pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); else { if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { reset_early_page_tables(); goto again; } pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; for (i = 0; i < PTRS_PER_PUD; i++) pud_p[i] = 0; *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pud_p += pud_index(address); pud = *pud_p; if (pud) pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); else { if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { reset_early_page_tables(); goto again; } pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; for (i = 0; i < PTRS_PER_PMD; i++) pmd_p[i] = 0; *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pmd = (physaddr & PMD_MASK) + early_pmd_flags; pmd_p[pmd_index(address)] = pmd; return 0; }
void init_espfix_ap(int cpu) { unsigned int page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { if (cpu) pmd_p = page_address(alloc_pages_node(node, PGALLOC_GFP, 0)); else pmd_p = espfix_pmd_page; pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); } else
static void __meminit init_trampoline_pud(void) { pud_t *pud_page_tramp, *pud, *pud_tramp; p4d_t *p4d_page_tramp, *p4d, *p4d_tramp; unsigned long paddr, vaddr; pgd_t *pgd; pud_page_tramp = alloc_low_page(); /* * There are two mappings for the low 1MB area, the direct mapping * and the 1:1 mapping for the real mode trampoline: * * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET * 1:1 mapping: virt_addr = phys_addr */ paddr = 0; vaddr = (unsigned long)__va(paddr); pgd = pgd_offset_k(vaddr); p4d = p4d_offset(pgd, vaddr); pud = pud_offset(p4d, vaddr); pud_tramp = pud_page_tramp + pud_index(paddr); *pud_tramp = *pud; if (pgtable_l5_enabled()) { p4d_page_tramp = alloc_low_page(); p4d_tramp = p4d_page_tramp + p4d_index(paddr); set_p4d(p4d_tramp, __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); set_pgd(&trampoline_pgd_entry, __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); } else { set_pgd(&trampoline_pgd_entry, __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); } }