int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { gfn_t gfn, end_gfn; kvm_pfn_t pfn; int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; /* check if iommu exists and in use */ if (!domain) return 0; gfn = slot->base_gfn; end_gfn = gfn + slot->npages; flags = IOMMU_READ; if (!(slot->flags & KVM_MEM_READONLY)) flags |= IOMMU_WRITE; if (!kvm->arch.iommu_noncoherent) flags |= IOMMU_CACHE; while (gfn < end_gfn) { unsigned long page_size; /* Check if already mapped */ if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { gfn += 1; continue; } /* Get the page size we could use to map */ page_size = kvm_host_page_size(kvm, gfn); /* Make sure the page_size does not exceed the memslot */ while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) page_size >>= 1; /* Make sure gfn is aligned to the page size we want to map */ while ((gfn << PAGE_SHIFT) & (page_size - 1)) page_size >>= 1; /* Make sure hva is aligned to the page size we want to map */ while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) page_size >>= 1; /* * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. */ pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); if (is_error_noslot_pfn(pfn)) { gfn += 1; continue; } /* Map into IO address space */ r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), page_size, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); goto unmap_pages; } gfn += page_size >> PAGE_SHIFT; cond_resched(); } return 0; unmap_pages: kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); return r; }
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; unsigned hpage_shift; bool is_ci; unsigned long *rmap; pte_t *ptep; unsigned int writing; unsigned long mmu_seq; unsigned long rcbits, irq_flags = 0; if (kvm_is_radix(kvm)) return H_FUNCTION; psize = kvmppc_actual_pgsz(pteh, ptel); if (!psize) return H_PARAMETER; writing = hpte_is_writable(ptel); pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); ptel &= ~HPTE_GR_RESERVED; g_ptel = ptel; /* used later to detect if we might have been invalidated */ mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); /* Find the memslot (if any) for this address */ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); gfn = gpa >> PAGE_SHIFT; memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); pa = 0; is_ci = false; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { /* Emulated MMIO - mark this with key=31 */ pteh |= HPTE_V_ABSENT; ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; goto do_insert; } /* Check if the requested page fits entirely in the memslot. */ if (!slot_is_aligned(memslot, psize)) return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; rmap = &memslot->arch.rmap[slot_fn]; /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); /* * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ if (!realmode) local_irq_save(irq_flags); /* * If called in real mode we have MSR_EE = 0. Otherwise * we disable irq above. */ ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; if (hpage_shift) host_pte_size = 1ul << hpage_shift; else host_pte_size = PAGE_SIZE; /* * We should always find the guest page size * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { if (!realmode) local_irq_restore(flags); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); pa = pte_pfn(pte) << PAGE_SHIFT; pa |= hva & (host_pte_size - 1); pa |= gpa & ~PAGE_MASK; } } if (!realmode) local_irq_restore(irq_flags); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; if (pa) pteh |= HPTE_V_VALID; else { pteh |= HPTE_V_ABSENT; ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); } /*If we had host pte mapping then Check WIMG */ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { if (is_ci) return H_PARAMETER; /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. */ ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); ptel |= HPTE_R_M; } /* Find and lock the HPTEG slot to use */ do_insert: if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; hpte += 2; } if (i == 8) { /* * Since try_lock_hpte doesn't retry (not even stdcx. * failures), it could be that there is a free slot * but we transiently failed to lock it. Try again, * actually locking each slot and checking it. */ hpte -= 16; for (i = 0; i < 8; ++i) { u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); pte = be64_to_cpu(hpte[0]); if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; __unlock_hpte(hpte, pte); hpte += 2; } if (i == 8) return H_PTEG_FULL; } pte_index += i; } else { hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); pte = be64_to_cpu(hpte[0]); if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { __unlock_hpte(hpte, pte); return H_PTEG_FULL; } } } /* Save away the guest's idea of the second HPTE dword */ rev = &kvm->arch.hpt.rev[pte_index]; if (realmode) rev = real_vmalloc_addr(rev); if (rev) { rev->guest_rpte = g_ptel; note_hpte_modification(kvm, rev); } /* Link HPTE into reverse-map chain */ if (pteh & HPTE_V_VALID) { if (realmode) rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ if (mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); unlock_rmap(rmap); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode); /* Only set R/C in real HPTE if already set in *rmap */ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); } } /* Convert to new format on P9 */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { ptel = hpte_old_to_new_r(pteh, ptel); pteh = hpte_old_to_new_v(pteh); } hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); __unlock_hpte(hpte, pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; return H_SUCCESS; }