/* We only try to do i/d cache coherency on stuff that looks like * reasonably "normal" PTEs. We currently require a PTE to be present * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that * on userspace PTEs */ static inline int pte_looks_normal(pte_t pte) { #if defined(CONFIG_PPC_BOOK3S_64) if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) { if (pte_ci(pte)) return 0; if (pte_user(pte)) return 1; } return 0; #else return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == (_PAGE_PRESENT | _PAGE_USER); #endif }
int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize) { unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; unsigned long vpn, hash, slot; unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift; /* * atomically mark the linux large page PTE busy and dirty */ do { pte_t pte = READ_ONCE(*ptep); old_pte = pte_val(pte); /* If PTE busy, retry the access */ if (unlikely(old_pte & H_PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) return 1; /* * Check if PTE has the cache-inhibit bit set * If so, bail out and refault as a 4k page */ if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && unlikely(pte_ci(pte))) return 0; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access. */ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); vpn = hpt_vpn(ea, vsid, ssize); if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* * There MIGHT be an HPTE for this pte */ hash = hpt_hash(vpn, shift, ssize); if (old_pte & H_PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, MMU_PAGE_64K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & H_PAGE_HASHPTE))) { pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; hash = hpt_hash(vpn, shift, ssize); repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, MMU_PAGE_64K, MMU_PAGE_64K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, MMU_PAGE_64K, MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ goto repeat; } } /* * Hypervisor failure. Restore old pte and return -1 * similar to __hash_page_* */ if (unlikely(slot == -2)) { *ptep = __pte(old_pte); hash_failure_debug(ea, access, vsid, trap, ssize, MMU_PAGE_64K, MMU_PAGE_64K, old_pte); return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; }
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; unsigned hpage_shift; bool is_ci; unsigned long *rmap; pte_t *ptep; unsigned int writing; unsigned long mmu_seq; unsigned long rcbits, irq_flags = 0; if (kvm_is_radix(kvm)) return H_FUNCTION; psize = kvmppc_actual_pgsz(pteh, ptel); if (!psize) return H_PARAMETER; writing = hpte_is_writable(ptel); pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); ptel &= ~HPTE_GR_RESERVED; g_ptel = ptel; /* used later to detect if we might have been invalidated */ mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); /* Find the memslot (if any) for this address */ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); gfn = gpa >> PAGE_SHIFT; memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); pa = 0; is_ci = false; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { /* Emulated MMIO - mark this with key=31 */ pteh |= HPTE_V_ABSENT; ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; goto do_insert; } /* Check if the requested page fits entirely in the memslot. */ if (!slot_is_aligned(memslot, psize)) return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; rmap = &memslot->arch.rmap[slot_fn]; /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); /* * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ if (!realmode) local_irq_save(irq_flags); /* * If called in real mode we have MSR_EE = 0. Otherwise * we disable irq above. */ ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; if (hpage_shift) host_pte_size = 1ul << hpage_shift; else host_pte_size = PAGE_SIZE; /* * We should always find the guest page size * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { if (!realmode) local_irq_restore(flags); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); pa = pte_pfn(pte) << PAGE_SHIFT; pa |= hva & (host_pte_size - 1); pa |= gpa & ~PAGE_MASK; } } if (!realmode) local_irq_restore(irq_flags); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; if (pa) pteh |= HPTE_V_VALID; else { pteh |= HPTE_V_ABSENT; ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); } /*If we had host pte mapping then Check WIMG */ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { if (is_ci) return H_PARAMETER; /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. */ ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); ptel |= HPTE_R_M; } /* Find and lock the HPTEG slot to use */ do_insert: if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; hpte += 2; } if (i == 8) { /* * Since try_lock_hpte doesn't retry (not even stdcx. * failures), it could be that there is a free slot * but we transiently failed to lock it. Try again, * actually locking each slot and checking it. */ hpte -= 16; for (i = 0; i < 8; ++i) { u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); pte = be64_to_cpu(hpte[0]); if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; __unlock_hpte(hpte, pte); hpte += 2; } if (i == 8) return H_PTEG_FULL; } pte_index += i; } else { hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); pte = be64_to_cpu(hpte[0]); if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { __unlock_hpte(hpte, pte); return H_PTEG_FULL; } } } /* Save away the guest's idea of the second HPTE dword */ rev = &kvm->arch.hpt.rev[pte_index]; if (realmode) rev = real_vmalloc_addr(rev); if (rev) { rev->guest_rpte = g_ptel; note_hpte_modification(kvm, rev); } /* Link HPTE into reverse-map chain */ if (pteh & HPTE_V_VALID) { if (realmode) rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ if (mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); unlock_rmap(rmap); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode); /* Only set R/C in real HPTE if already set in *rmap */ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); } } /* Convert to new format on P9 */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { ptel = hpte_old_to_new_r(pteh, ptel); pteh = hpte_old_to_new_v(pteh); } hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); __unlock_hpte(hpte, pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; return H_SUCCESS; }