/** * kvm_phys_addr_ioremap - map a device range to guest IPA * * @kvm: The KVM pointer * @guest_ipa: The IPA at which to insert the mapping * @pa: The physical address of the device * @size: The size of the mapping */ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size) { phys_addr_t addr, end; int ret = 0; unsigned long pfn; struct kvm_mmu_memory_cache cache = { 0, }; end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(pa); for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); kvm_set_s2pte_writable(&pte); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) goto out; spin_lock(&kvm->mmu_lock); ret = stage2_set_pte(kvm, &cache, addr, &pte, true); spin_unlock(&kvm->mmu_lock); if (ret) goto out; pfn++; } out: mmu_free_memory_cache(&cache); return ret; }
static int mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; pgoff_t offset, max_off; struct inode *inode; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (dst_vma->vm_file) { /* the shmem MAP_PRIVATE case requires checking the i_size */ inode = dst_vma->vm_file->f_inode; offset = linear_page_index(dst_vma, dst_addr); max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); ret = -EFAULT; if (unlikely(offset >= max_off)) goto out_unlock; } ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); ret = 0; out_unlock: pte_unmap_unlock(dst_pte, ptl); return ret; }
static int __init dell_efi_quirk(const struct dmi_system_id *d) { u64 vaddr; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; /* * Some UEFI run time implementations (DELL) require physical page * zero to be mapped. This location is used during EfiResetSystem * when ResetType is EfiResetWarm (reboot=warm). UEFI writes to * a BIOS physical address of 0x472 for the reboot mode. The reason * for this hasn't been revealed by the UEFI developers. */ printk(KERN_INFO "%s series board detected. Applying quirk for" " page 0 UEFI firmware access.\n", d->ident); vaddr = 0UL; pgd = efi_pgd + pgd_index(vaddr); pud = fill_pud(pgd, vaddr); pmd = fill_pmd(pud, vaddr); pte = fill_pte(pmd, vaddr); set_pte(pte, pfn_pte(0UL, PAGE_KERNEL)); return 0; }
void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long pfn, dpfn; pgprot_t hugeprot; int ncontig, i; size_t pgsize; pte_t pte; if (!pte_cont(READ_ONCE(*ptep))) { ptep_set_wrprotect(mm, addr, ptep); return; } ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); hugeprot = pte_pgprot(pte); pfn = pte_pfn(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); }
static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) { unsigned long end; unsigned long pfn; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; if (address >= end) BUG(); pfn = phys_addr >> PAGE_SHIFT; do { if (!pte_none(*pte)) { printk("remap_area_pte: page already exists\n"); BUG(); } set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); address += PAGE_SIZE; pfn++; pte++; } while (address && (address < end)); }
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { size_t pgsize; int i; int ncontig; unsigned long pfn, dpfn; pgprot_t hugeprot; /* * Code needs to be expanded to handle huge swap and migration * entries. Needed for HUGETLB and MEMORY_FAILURE. */ WARN_ON(!pte_present(pte)); if (!pte_cont(pte)) { set_pte_at(mm, addr, ptep, pte); return; } ncontig = find_num_contig(mm, addr, ptep, &pgsize); pfn = pte_pfn(pte); dpfn = pgsize >> PAGE_SHIFT; hugeprot = pte_pgprot(pte); clear_flush(mm, addr, ptep, pgsize, ncontig); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); }
int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { int ncontig, i, changed = 0; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); if (!pte_same(orig_pte, pte)) changed = 1; /* Make sure we don't lose the dirty state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); return changed; }
/* * Set the page permissions for a particular virtual address. If the * address is a vmalloc mapping (or other non-linear mapping), then * find the linear mapping of the page and also set its protections to * match. */ static void set_aliased_prot(void *v, pgprot_t prot) { int level; pte_t *ptep; pte_t pte; unsigned long pfn; struct page *page; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); page = pfn_to_page(pfn); pte = pfn_pte(pfn, prot); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); if (!PageHighMem(page)) { void *av = __va(PFN_PHYS(pfn)); if (av != v) if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) BUG(); } else kmap_flush_unused(); }
void *nvmap_kmap(struct nvmap_handle_ref *ref, unsigned int pagenum) { struct nvmap_handle *h; phys_addr_t paddr; unsigned long kaddr; pgprot_t prot; pte_t **pte; BUG_ON(!ref); h = nvmap_handle_get(ref->handle); if (!h) return NULL; BUG_ON(pagenum >= h->size >> PAGE_SHIFT); prot = nvmap_pgprot(h, pgprot_kernel); pte = nvmap_alloc_pte(nvmap_dev, (void **)&kaddr); if (!pte) goto out; if (h->heap_pgalloc) paddr = page_to_phys(h->pgalloc.pages[pagenum]); else paddr = h->carveout->base + pagenum * PAGE_SIZE; set_pte_at(&init_mm, kaddr, *pte, pfn_pte(__phys_to_pfn(paddr), prot)); flush_tlb_kernel_page(kaddr); return (void *)kaddr; out: nvmap_handle_put(ref->handle); return NULL; }
/* * load_gdt for early boot, when the gdt is only mapped once */ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; unsigned long frames[pages]; int f; /* * A GDT can be up to 64k in size, which corresponds to 8192 * 8-byte entries, or 16 4k pages.. */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { pte_t pte; unsigned long pfn, mfn; pfn = virt_to_pfn(va); mfn = pfn_to_mfn(pfn); pte = pfn_pte(pfn, PAGE_KERNEL_RO); if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) BUG(); frames[f] = mfn; } if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) BUG(); }
static inline void remap_area_pte(pte_t *pte, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) { unsigned long end, pfn; pgprot_t pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; BUG_ON(address >= end); pfn = phys_addr >> PAGE_SHIFT; do { BUG_ON(!pte_none(*pte)); set_pte(pte, pfn_pte(pfn, pgprot)); address += PAGE_SIZE; pfn++; pte++; } while (address && (address < end)); }
static int pin_page(struct page *page, unsigned flags) { unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); int flush; if (pgfl) flush = 0; /* already pinned */ else if (PageHighMem(page)) /* kmaps need flushing if we found an unpinned highpage */ flush = 1; else { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); struct multicall_space mcs = __xen_mc_entry(0); flush = 0; MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, pfn_pte(pfn, PAGE_KERNEL_RO), flags); } return flush; }
/* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. */ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = swapper_pg_dir + pgd_index(vaddr); if (pgd_none(*pgd)) { BUG(); return; } pud = pud_offset(pgd, vaddr); if (pud_none(*pud)) { BUG(); return; } pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd)) { BUG(); return; } pte = pte_offset_kernel(pmd, vaddr); /* <pfn,flags> stored as-is, to permit clearing entries */ set_pte(pte, pfn_pte(pfn, flags)); /* * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ __flush_tlb_one(vaddr); }
static inline void remap_area_pte(pte_t * pte, unsigned long address, phys_t size, phys_t phys_addr, unsigned long flags) { phys_t end; unsigned long pfn; pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE | flags); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; BUG_ON(address >= end); pfn = phys_addr >> PAGE_SHIFT; do { if (!pte_none(*pte)) { printk("remap_area_pte: page already exists\n"); BUG(); } set_pte(pte, pfn_pte(pfn, pgprot)); address += PAGE_SIZE; pfn++; pte++; } while (address && (address < end)); }
static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); flush_tlb_all(); pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); pte = pte_set_fixmap_offset(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); }
/*H:340 * Converting a Guest page table entry to a shadow (ie. real) page table * entry can be a little tricky. The flags are (almost) the same, but the * Guest PTE contains a virtual page number: the CPU needs the real page * number. */ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) { unsigned long pfn, base, flags; /* * The Guest sets the global flag, because it thinks that it is using * PGE. We only told it to use PGE so it would tell us whether it was * flushing a kernel mapping or a userspace mapping. We don't actually * use the global bit, so throw it away. */ flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); /* The Guest's pages are offset inside the Launcher. */ base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; /* * We need a temporary "unsigned long" variable to hold the answer from * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't * fit in spte.pfn. get_pfn() finds the real physical number of the * page, given the virtual number. */ pfn = get_pfn(base + pte_pfn(gpte), write); if (pfn == -1UL) { kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); /* * When we destroy the Guest, we'll go through the shadow page * tables and release_pte() them. Make sure we don't think * this one is valid! */ flags = 0; } /* Now we assemble our shadow PTE from the page number and flags. */ return pfn_pte(pfn, __pgprot(flags)); }
/* now sets up tables using sun3 PTEs rather than i386 as before. --m */ void __init paging_init(void) { pgd_t * pg_dir; pte_t * pg_table; int i; unsigned long address; unsigned long next_pgtable; unsigned long bootmem_end; unsigned long zones_size[MAX_NR_ZONES] = { 0, }; unsigned long size; #ifdef TEST_VERIFY_AREA wp_works_ok = 0; #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); memset(empty_zero_page, 0, PAGE_SIZE); address = PAGE_OFFSET; pg_dir = swapper_pg_dir; memset (swapper_pg_dir, 0, sizeof (swapper_pg_dir)); memset (kernel_pg_dir, 0, sizeof (kernel_pg_dir)); size = num_pages * sizeof(pte_t); size = (size + PAGE_SIZE) & ~(PAGE_SIZE-1); next_pgtable = (unsigned long)alloc_bootmem_pages(size); bootmem_end = (next_pgtable + size + PAGE_SIZE) & PAGE_MASK; /* Map whole memory from PAGE_OFFSET (0x0E000000) */ pg_dir += PAGE_OFFSET >> PGDIR_SHIFT; while (address < (unsigned long)high_memory) { pg_table = (pte_t *) __pa (next_pgtable); next_pgtable += PTRS_PER_PTE * sizeof (pte_t); pgd_val(*pg_dir) = (unsigned long) pg_table; pg_dir++; /* now change pg_table to kernel virtual addresses */ pg_table = (pte_t *) __va ((unsigned long) pg_table); for (i=0; i<PTRS_PER_PTE; ++i, ++pg_table) { pte_t pte = pfn_pte(virt_to_pfn(address), PAGE_INIT); if (address >= (unsigned long)high_memory) pte_val (pte) = 0; set_pte (pg_table, pte); address += PAGE_SIZE; } } mmu_emu_init(bootmem_end); current->mm = NULL; /* memory sizing is a hack stolen from motorola.c.. hope it works for us */ zones_size[ZONE_DMA] = ((unsigned long)high_memory - PAGE_OFFSET) >> PAGE_SHIFT; free_area_init(zones_size); }
/* * Set the page permissions for a particular virtual address. If the * address is a vmalloc mapping (or other non-linear mapping), then * find the linear mapping of the page and also set its protections to * match. */ static void set_aliased_prot(void *v, pgprot_t prot) { int level; pte_t *ptep; pte_t pte; unsigned long pfn; struct page *page; unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); page = pfn_to_page(pfn); pte = pfn_pte(pfn, prot); /* * Careful: update_va_mapping() will fail if the virtual address * we're poking isn't populated in the page tables. We don't * need to worry about the direct map (that's always in the page * tables), but we need to be careful about vmap space. In * particular, the top level page table can lazily propagate * entries between processes, so if we've switched mms since we * vmapped the target in the first place, we might not have the * top-level page table entry populated. * * We disable preemption because we want the same mm active when * we probe the target and when we issue the hypercall. We'll * have the same nominal mm, but if we're a kernel thread, lazy * mm dropping could change our pgd. * * Out of an abundance of caution, this uses __get_user() to fault * in the target address just in case there's some obscure case * in which the target address isn't readable. */ preempt_disable(); pagefault_disable(); /* Avoid warnings due to being atomic. */ __get_user(dummy, (unsigned char __user __force *)v); pagefault_enable(); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); if (!PageHighMem(page)) { void *av = __va(PFN_PHYS(pfn)); if (av != v) if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) BUG(); } else kmap_flush_unused(); preempt_enable(); }
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn, struct kvm_memory_slot *memslot, unsigned long fault_status) { pte_t new_pte; pfn_t pfn; int ret; bool write_fault, writable; unsigned long mmu_seq; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) return ret; mmu_seq = vcpu->kvm->mmu_notifier_seq; /* * Ensure the read of mmu_notifier_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets * unmapped afterwards, the call to kvm_unmap_hva will take it away * from us again properly. This smp_rmb() interacts with the smp_wmb() * in kvm_mmu_notifier_invalidate_<page|range_end>. */ smp_rmb(); pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; new_pte = pfn_pte(pfn, PAGE_S2); coherent_icache_guest_page(vcpu->kvm, gfn); spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); return 0; }
static inline void pgd_walk_set_prot(void *pt, pgprot_t flags) { struct page *page = virt_to_page(pt); unsigned long pfn = page_to_pfn(page); if (PageHighMem(page)) return; BUG_ON(HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte(pfn, flags), 0)); }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx, pte_ofs; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = one_md_table_init(pgd); if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; /* Map with big pages if possible, otherwise create normal page tables. */ if (cpu_has_pse) { unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; if (is_kernel_text(address) || is_kernel_text(address2)) set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); else set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); pfn += PTRS_PER_PTE; } else { pte = one_page_table_init(pmd); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { if (is_kernel_text(address)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); } } } } }
static void clear_pte_phys(unsigned long addr, pgprot_t prot) { pte_t *pte; pte = __get_pte_phys(addr); if (pgprot_val(prot) & _PAGE_WIRED) tlb_unwire_entry(); set_pte(pte, pfn_pte(0, __pgprot(0))); local_flush_tlb_one(get_asid(), addr); }
/* * The __w1data area holds data that is only written during initialization, * and is read-only and thus freely cacheable thereafter. Fix the page * table entries that cover that region accordingly. */ static void mark_w1data_ro(void) { /* Loop over page table entries */ unsigned long addr = (unsigned long)__w1data_begin; BUG_ON((addr & (PAGE_SIZE-1)) != 0); for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { unsigned long pfn = kaddr_to_pfn((void *)addr); pte_t *ptep = virt_to_pte(NULL, addr); BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); } }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. */ static int resume_physical_mapping_init(pgd_t *pgd_base) { unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = resume_one_md_table_init(pgd); if (!pmd) return -ENOMEM; if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { if (pfn >= max_low_pfn) break; /* Map with big pages if possible, otherwise create * normal page tables. * NOTE: We can mark everything as executable here */ if (cpu_has_pse) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { pte_t *max_pte; pte = resume_one_page_table_init(pmd); if (!pte) return -ENOMEM; max_pte = pte + PTRS_PER_PTE; for (; pte < max_pte; pte++, pfn++) { if (pfn >= max_low_pfn) break; set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); } } } } resume_map_numa_kva(pgd_base); return 0; }
void pte_free(struct page *pte) { unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); if (!pte_write(*virt_to_ptep(va))) BUG_ON(HYPERVISOR_update_va_mapping( va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); ClearPageForeign(pte); init_page_count(pte); __free_page(pte); }
static void free_init_pages(char *what, unsigned long begin, unsigned long end) { #ifdef CONFIG_HOMECACHE int home = initial_heap_home(); #endif unsigned long addr = (unsigned long) begin; if (kdata_huge && !initfree) { pr_warning("Warning: ignoring initfree=0:" " incompatible with kdata=huge\n"); initfree = 1; } end = (end + PAGE_SIZE - 1) & PAGE_MASK; local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin); for (addr = begin; addr < end; addr += PAGE_SIZE) { /* * Note we just reset the home here directly in the * page table. We know this is safe because our caller * just flushed the caches on all the other cpus, * and they won't be touching any of these pages. */ int pfn = kaddr_to_pfn((void *)addr); struct page *page = pfn_to_page(pfn); pte_t *ptep = virt_to_pte(NULL, addr); if (!initfree) { /* * If debugging page accesses then do not free * this memory but mark them not present - any * buggy init-section access will create a * kernel page fault: */ pte_clear(&init_mm, addr, ptep); continue; } #ifdef CONFIG_HOMECACHE set_page_home(page, home); __clear_bit(PG_homecache_nomigrate, &page->flags); #endif __ClearPageReserved(page); init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); }
/* * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); flush_tlb_one(vaddr); return (void*) vaddr; }
/* * remap a PMD into pages */ static void split_pmd(pmd_t *pmd, pte_t *pte) { unsigned long pfn = pmd_pfn(*pmd); int i = 0; do { /* * Need to have the least restrictive permissions available * permissions will be fixed up later */ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); pfn++; } while (pte++, i++, i < PTRS_PER_PTE); }
/* * nid, region_start, and region_end are hints to try to place the page * table memory in the same node or region. */ static int __map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, int nid, unsigned long region_start, unsigned long region_end) { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; /* * Make sure task size is correct as per the max adddr */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); if (unlikely(!slab_is_available())) return early_map_kernel_page(ea, pa, flags, map_page_size, nid, region_start, region_end); /* * Should make page table allocation functions be able to take a * node, so we can place kernel page tables on the right nodes after * boot. */ pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; }
static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, addr); pte_t zero_pte; zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL); zero_pte = pte_wrprotect(zero_pte); while (addr + PAGE_SIZE <= end) { set_pte_at(&init_mm, addr, pte, zero_pte); addr += PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); } }