static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); /* * Mapping in huge pages should only happen through a fault. If a * page is merged into a transparent huge page, the individual * subpages of that huge page should be unmapped through MMU * notifiers before we get here. * * Merging of CompoundPages is not supported; they should become * splitting first, unmapped, merged, and mapped back in on-demand. */ VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); old_pmd = *pmd; kvm_set_pmd(pmd, *new_pmd); if (pmd_present(old_pmd)) kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pmd)); return 0; }
int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; if (above != 0 && above != -1UL) return 0; pgd = pgd_offset_k(addr); if (pgd_none(*pgd)) return 0; pud = pud_offset(pgd, addr); if (pud_none(*pud)) return 0; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; if (pmd_large(*pmd)) return pfn_valid(pmd_pfn(*pmd)); pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) return 0; return pfn_valid(pte_pfn(*pte)); }
/* Follow the PMD to the PTE. */ static unsigned long gpte_addr(struct lg_cpu *cpu, pmd_t gpmd, unsigned long vaddr) { unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT; BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT)); return gpage + pte_index(vaddr) * sizeof(pte_t); }
static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; if (pmd_present(old_pmd)) { /* * Multiple vcpus faulting on the same PMD entry, can * lead to them sequentially updating the PMD with the * same value. Following the break-before-make * (pmd_clear() followed by tlb_flush()) process can * hinder forward progress due to refaults generated * on missing translations. * * Skip updating the page table if the entry is * unchanged. */ if (pmd_val(old_pmd) == pmd_val(*new_pmd)) return 0; /* * Mapping in huge pages should only happen through a * fault. If a page is merged into a transparent huge * page, the individual subpages of that huge page * should be unmapped through MMU notifiers before we * get here. * * Merging of CompoundPages is not supported; they * should become splitting first, unmapped, merged, * and mapped back in on-demand. */ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { get_page(virt_to_page(pmd)); } kvm_set_pmd(pmd, *new_pmd); return 0; }
static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) { if ((pmd_flags(gpmd) & ~_PAGE_TABLE) || (pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) { kill_guest(cpu, "bad page middle directory entry"); return false; } return true; }
/* * remap a PMD into pages */ static void split_pmd(pmd_t *pmd, pte_t *pte) { unsigned long pfn = pmd_pfn(*pmd); int i = 0; do { /* * Need to have the least restrictive permissions available * permissions will be fixed up later */ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); pfn++; } while (pte++, i++, i < PTRS_PER_PTE); }
static void release_pmd(pmd_t *spmd) { /* If the entry's not present, there's nothing to release. */ if (pmd_flags(*spmd) & _PAGE_PRESENT) { unsigned int i; pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT); /* For each entry in the page, we might need to release it. */ for (i = 0; i < PTRS_PER_PTE; i++) release_pte(ptepage[i]); /* Now we can free the page of PTEs */ free_page((long)ptepage); /* And zero out the PMD entry so we never release it twice. */ set_pmd(spmd, __pmd(0)); } }
/* * This routine then takes the page directory entry returned above, which * contains the address of the page table entry (PTE) page. It then returns a * pointer to the PTE entry for the given address. */ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) { #ifdef CONFIG_X86_PAE pmd_t *pmd = spmd_addr(cpu, spgd, vaddr); pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT); /* You should never call this if the PMD entry wasn't valid */ BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)); #else pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); /* You should never call this if the PGD entry wasn't valid */ BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT)); #endif return &page[pte_index(vaddr)]; }
static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) { if ((pmd_flags(gpmd) & ~_PAGE_TABLE) || (pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) kill_guest(cpu, "bad page middle directory entry"); }
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; unsigned long rflags, pa, hidx; unsigned long old_pmd, new_pmd; int ret, lpsize = MMU_PAGE_16M; unsigned long vpn, hash, shift, slot; /* * atomically mark the linux large page PMD busy and dirty */ do { old_pmd = pmd_val(*pmdp); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; /* If PMD is trans splitting retry the access */ if (unlikely(old_pmd & _PAGE_SPLITTING)) return 0; /* If PMD permissions don't match, take page fault */ if (unlikely(access & ~old_pmd)) return 1; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access */ new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_RW) new_pmd |= _PAGE_DIRTY; } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp, old_pmd, new_pmd)); /* * PP bits. _PAGE_USER is already PP bit 0x2, so we only * need to add in 0x1 if it's a read-only user page */ rflags = new_pmd & _PAGE_USER; if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) && (new_pmd & _PAGE_DIRTY))) rflags |= 0x1; /* * _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { /* * No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); } #endif /* * Find the slot index details for this ea, using base page size. */ shift = mmu_psize_defs[psize].shift; index = (ea & ~HPAGE_PMD_MASK) >> shift; BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, psize, lpsize, ssize, local); /* * We failed to update, try to insert a new entry. */ if (ret == -1) { /* * large pte is marked busy, so we can be sure * nobody is looking at hpte_slot_array. hence we can * safely update this here. */ valid = 0; new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; } else /* clear the busy bits and set the hash pte bits */ new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; } if (!valid) { unsigned long hpte_group; /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* clear the busy bits and set the hash pte bits */ new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED)); /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, psize, lpsize, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; ppc_md.hpte_remove(hpte_group); goto repeat; } } /* * Hypervisor failure. Restore old pmd and return -1 * similar to __hash_page_* */ if (unlikely(slot == -2)) { *pmdp = __pmd(old_pmd); hash_failure_debug(ea, access, vsid, trap, ssize, psize, lpsize, old_pmd); return -1; } /* * large pte is marked busy, so we can be sure * nobody is looking at hpte_slot_array. hence we can * safely update this here. */ mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* * No need to use ldarx/stdcx here */ *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; }