static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { if (pud_huge(*pud)) { pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { pmd_t *pmd_table = pmd_offset(pud, 0); pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); pmd_free(NULL, pmd_table); } put_page(virt_to_page(pud)); }
static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { if (kvm_pmd_huge(*pmd)) { pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { pte_t *pte_table = pte_offset_kernel(pmd, 0); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); pte_free_kernel(NULL, pte_table); } put_page(virt_to_page(pmd)); }
static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); /* * Mapping in huge pages should only happen through a fault. If a * page is merged into a transparent huge page, the individual * subpages of that huge page should be unmapped through MMU * notifiers before we get here. * * Merging of CompoundPages is not supported; they should become * splitting first, unmapped, merged, and mapped back in on-demand. */ VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); old_pmd = *pmd; kvm_set_pmd(pmd, *new_pmd); if (pmd_present(old_pmd)) kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pmd)); return 0; }
static void unmap_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; start_pud = pud = pud_offset(pgd, addr); do { next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { if (pud_huge(*pud)) { pud_t old_pud = *pud; pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { unmap_pmds(kvm, pud, addr, next); } } } while (pud++, addr = next, addr != end); if (kvm_pud_table_empty(kvm, start_pud)) clear_pgd_entry(kvm, pgd, start_addr); }
/* * Unmapping vs dcache management: * * If a guest maps certain memory pages as uncached, all writes will * bypass the data cache and go directly to RAM. However, the CPUs * can still speculate reads (not writes) and fill cache lines with * data. * * Those cache lines will be *clean* cache lines though, so a * clean+invalidate operation is equivalent to an invalidate * operation, because no cache lines are marked dirty. * * Those clean cache lines could be filled prior to an uncached write * by the guest, and the cache coherent IO subsystem would therefore * end up writing old data to disk. * * This is why right after unmapping a page/section and invalidating * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure * the IO subsystem will never hit in the cache. */ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; pte_t *pte, *start_pte; start_pte = pte = pte_offset_kernel(pmd, addr); do { if (!pte_none(*pte)) { pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); /* No need to invalidate the cache for device mappings */ if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) kvm_flush_dcache_pte(old_pte); put_page(virt_to_page(pte)); } } while (pte++, addr += PAGE_SIZE, addr != end); if (kvm_pte_table_empty(kvm, start_pte)) clear_pmd_entry(kvm, pmd, start_addr); }
static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); pgd_clear(pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); pud_free(NULL, pud_table); put_page(virt_to_page(pgd)); }
static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) { if (pte_present(*pte)) { kvm_set_pte(pte, __pte(0)); put_page(virt_to_page(pte)); kvm_tlb_flush_vmid_ipa(kvm, addr); } }
/** * stage2_dissolve_pmd() - clear and flush huge PMD entry * @kvm: pointer to kvm structure. * @addr: IPA * @pmd: pmd pointer for IPA * * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all * pages in the range dirty. */ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) { if (!kvm_pmd_huge(*pmd)) return; pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); put_page(virt_to_page(pmd)); }
/** * kvm_free_stage2_pgd - free all stage-2 tables * @kvm: The KVM struct pointer for the VM. * * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all * underlying level-2 and level-3 tables before freeing the actual level-1 table * and setting the struct pointer to NULL. * * Note we don't need locking here as this is only called when the VM is * destroyed, which can only be done once. */ void kvm_free_stage2_pgd(struct kvm *kvm) { if (kvm->arch.pgd == NULL) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); kvm_tlb_flush_vmid_ipa(kvm, 0); /* Invalidate TLB ALL */ free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); kvm->arch.pgd = NULL; }
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) { pmd_t *pmd; pte_t *pte, old_pte; bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ pmd = stage2_get_pmd(kvm, cache, addr); if (!pmd) { /* * Ignore calls from kvm_set_spte_hva for unallocated * address ranges. */ return 0; } /* * While dirty page logging - dissolve huge PMD, then continue on to * allocate page. */ if (logging_active) stage2_dissolve_pmd(kvm, addr, pmd); /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); } pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); return 0; }
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, bool iomap) { pmd_t *pmd; pte_t *pte, old_pte; /* Create stage-2 page table mapping - Levels 0 and 1 */ pmd = stage2_get_pmd(kvm, cache, addr); if (!pmd) { /* * Ignore calls from kvm_set_spte_hva for unallocated * address ranges. */ return 0; } /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); } pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; if (pte_present(old_pte)) { /* Skip page table update if there is no change */ if (pte_val(old_pte) == pte_val(*new_pte)) return 0; kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { get_page(virt_to_page(pte)); } kvm_set_pte(pte, *new_pte); return 0; }
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, bool iomap) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } pmd = pmd_offset(pud, addr); /* Create 2nd stage page table mapping - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); } pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); return 0; }
static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; if (pmd_present(old_pmd)) { /* * Multiple vcpus faulting on the same PMD entry, can * lead to them sequentially updating the PMD with the * same value. Following the break-before-make * (pmd_clear() followed by tlb_flush()) process can * hinder forward progress due to refaults generated * on missing translations. * * Skip updating the page table if the entry is * unchanged. */ if (pmd_val(old_pmd) == pmd_val(*new_pmd)) return 0; /* * Mapping in huge pages should only happen through a * fault. If a page is merged into a transparent huge * page, the individual subpages of that huge page * should be unmapped through MMU notifiers before we * get here. * * Merging of CompoundPages is not supported; they * should become splitting first, unmapped, merged, * and mapped back in on-demand. */ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { get_page(virt_to_page(pmd)); } kvm_set_pmd(pmd, *new_pmd); return 0; }
static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; pte_t *pte, *start_pte; start_pte = pte = pte_offset_kernel(pmd, addr); do { if (!pte_none(*pte)) { kvm_set_pte(pte, __pte(0)); put_page(virt_to_page(pte)); kvm_tlb_flush_vmid_ipa(kvm, addr); } } while (pte++, addr += PAGE_SIZE, addr != end); if (kvm_pte_table_empty(kvm, start_pte)) clear_pmd_entry(kvm, pmd, start_addr); }