/** * resume_map_numa_kva - add KVA mapping to the temporary page tables created * during resume from hibernation * @pgd_base - temporary resume page directory */ void resume_map_numa_kva(pgd_t *pgd_base) { int node; for_each_online_node(node) { unsigned long start_va, start_pfn, nr_pages, pfn; start_va = (unsigned long)node_remap_start_vaddr[node]; start_pfn = node_remap_start_pfn[node]; nr_pages = (node_remap_end_vaddr[node] - node_remap_start_vaddr[node]) >> PAGE_SHIFT; printk(KERN_DEBUG "%s: node %d\n", __func__, node); for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); pgd_t *pgd = pgd_base + pgd_index(vaddr); pud_t *pud = pud_offset(pgd, vaddr); pmd_t *pmd = pmd_offset(pud, vaddr); set_pmd(pmd, pfn_pmd(start_pfn + pfn, PAGE_KERNEL_LARGE_EXEC)); printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", __func__, vaddr, start_pfn + pfn); } } }
/* * Associate a large virtual page frame with a given physical page frame * and protection flags for that frame. pfn is for the base of the page, * vaddr is what the page gets mapped to - both must be properly aligned. * The pmd must already be instantiated. Assumes PAE mode. */ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ printk(KERN_ERR "set_pmd_pfn: vaddr misaligned\n"); return; /* BUG(); */ } if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ printk(KERN_ERR "set_pmd_pfn: pfn misaligned\n"); return; /* BUG(); */ } pgd = swapper_pg_dir + pgd_index(vaddr); if (pgd_none(*pgd)) { printk(KERN_ERR "set_pmd_pfn: pgd_none\n"); return; /* BUG(); */ } pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); set_pmd(pmd, pfn_pmd(pfn, flags)); /* * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ __flush_tlb_one(vaddr); }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx, pte_ofs; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = one_md_table_init(pgd); if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; /* Map with big pages if possible, otherwise create normal page tables. */ if (cpu_has_pse) { unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; if (is_kernel_text(address) || is_kernel_text(address2)) set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); else set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); pfn += PTRS_PER_PTE; } else { pte = one_page_table_init(pmd); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { if (is_kernel_text(address)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); } } } } }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. */ static int resume_physical_mapping_init(pgd_t *pgd_base) { unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = resume_one_md_table_init(pgd); if (!pmd) return -ENOMEM; if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { if (pfn >= max_low_pfn) break; /* Map with big pages if possible, otherwise create * normal page tables. * NOTE: We can mark everything as executable here */ if (cpu_has_pse) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { pte_t *max_pte; pte = resume_one_page_table_init(pmd); if (!pte) return -ENOMEM; max_pte = pte + PTRS_PER_PTE; for (; pte < max_pte; pte++, pfn++) { if (pfn >= max_low_pfn) break; set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); } } } } resume_map_numa_kva(pgd_base); return 0; }
static int resume_physical_mapping_init(pgd_t *pgd_base) { unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = resume_one_md_table_init(pgd); if (!pmd) return -ENOMEM; if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { if (pfn >= max_low_pfn) break; if (cpu_has_pse) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { pte_t *max_pte; pte = resume_one_page_table_init(pmd); if (!pte) return -ENOMEM; max_pte = pte + PTRS_PER_PTE; for (; pte < max_pte; pte++, pfn++) { if (pfn >= max_low_pfn) break; set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); } } } } return 0; }
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; if (vaddr & (PMD_SIZE-1)) { printk(KERN_ERR "set_pmd_pfn: vaddr misaligned\n"); return; } if (pfn & (PTRS_PER_PTE-1)) { printk(KERN_ERR "set_pmd_pfn: pfn misaligned\n"); return; } pgd = swapper_pg_dir + pgd_index(vaddr); if (pgd_none(*pgd)) { printk(KERN_ERR "set_pmd_pfn: pgd_none\n"); return; } pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); set_pmd(pmd, pfn_pmd(pfn, flags)); local_flush_tlb_one(vaddr); }
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) return ret; mmu_seq = vcpu->kvm->mmu_notifier_seq; /* * Ensure the read of mmu_notifier_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets * unmapped afterwards, the call to kvm_unmap_hva will take it away * from us again properly. This smp_rmb() interacts with the smp_wmb() * in kvm_mmu_notifier_invalidate_<page|range_end>. */ smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); new_pmd = pmd_mkhuge(new_pmd); if (writable) { kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, PAGE_S2); if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } coherent_icache_guest_page(kvm, hva, PAGE_SIZE); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } out_unlock: spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); return ret; }
pmd_t mk_pmd(struct page *page, pgprot_t pgprot) { return pfn_pmd(page_to_pfn(page), pgprot); }
static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. */ if (!cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); pcpur_ptrs = alloc_bootmem(ptrs_size); for_each_possible_cpu(cpu) { pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); if (!pcpur_ptrs[cpu]) goto enomem; /* * Only use pcpur_size bytes and give back the rest. * * Ingo: The 2MB up-rounding bootmem is needed to make * sure the partial 2MB page is still fully RAM - it's * not well-specified to have a PAT-incompatible area * (unmapped RAM, device memory, etc.) in that hole. */ free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), PMD_SIZE - pcpur_size); memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); } /* allocate address and map */ vm.flags = VM_ALLOC; vm.size = num_possible_cpus() * PMD_SIZE; vm_area_register_early(&vm, PMD_SIZE); for_each_possible_cpu(cpu) { pmd_t *pmd; pmd = populate_extra_pmd((unsigned long)vm.addr + cpu * PMD_SIZE); set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), PAGE_KERNEL_LARGE)); } /* we're ready, commit */ pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PERCPU_FIRST_CHUNK_RESERVE, dyn_size, PMD_SIZE, vm.addr, NULL); goto out_free_ar; enomem: for_each_possible_cpu(cpu) if (pcpur_ptrs[cpu]) free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); ret = -ENOMEM; out_free_ar: free_bootmem(__pa(pcpur_ptrs), ptrs_size); return ret; }