int kvm_timer_hyp_init(void) { struct device_node *np; unsigned int ppi; int err; timecounter = arch_timer_get_timecounter(); if (!timecounter) return -ENODEV; np = of_find_matching_node(NULL, arch_timer_of_match); if (!np) { kvm_err("kvm_arch_timer: can't find DT node\n"); return -ENODEV; } ppi = irq_of_parse_and_map(np, 2); if (!ppi) { kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); err = -EINVAL; goto out; } err = request_percpu_irq(ppi, kvm_arch_timer_handler, "kvm guest timer", kvm_get_running_vcpus()); if (err) { kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", ppi, err); goto out; } timer_irq.irq = ppi; err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; } wqueue = create_singlethread_workqueue("kvm_arch_timer"); if (!wqueue) { err = -ENOMEM; goto out_free; } kvm_info("%s IRQ%d\n", np->name, ppi); on_each_cpu(kvm_timer_init_interrupt, NULL, 1); goto out; out_free: free_percpu_irq(ppi, kvm_get_running_vcpus()); out: of_node_put(np); return err; }
/** * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable * according to the host GIC model. Accordingly calls either * vgic_v2/v3_probe which registers the KVM_DEVICE that can be * instantiated by a guest later on . */ int kvm_vgic_hyp_init(void) { const struct gic_kvm_info *gic_kvm_info; int ret; gic_kvm_info = gic_get_kvm_info(); if (!gic_kvm_info) return -ENODEV; if (!gic_kvm_info->maint_irq) { kvm_err("No vgic maintenance irq\n"); return -ENXIO; } switch (gic_kvm_info->type) { case GIC_V2: ret = vgic_v2_probe(gic_kvm_info); break; case GIC_V3: ret = vgic_v3_probe(gic_kvm_info); break; default: ret = -ENODEV; }; if (ret) return ret; kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { kvm_err("Cannot register interrupt %d\n", kvm_vgic_global_state.maint_irq); return ret; } ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, "AP_KVM_ARM_VGIC_INIT_STARTING", vgic_init_cpu_starting, vgic_init_cpu_dying); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; } kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); return 0; out_free_irq: free_percpu_irq(kvm_vgic_global_state.maint_irq, kvm_get_running_vcpus()); return ret; }
static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; addr = start; do { pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { pte = pte_alloc_one_kernel(NULL, addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); create_hyp_pte_mappings(pmd, addr, next, pfn, prot); pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); return 0; }
/** * kvm_handle_guest_debug - handle a debug exception instruction * * @vcpu: the vcpu pointer * @run: access to the kvm_run structure for results * * We route all debug exceptions through the same handler. If both the * guest and host are using the same debug facilities it will be up to * userspace to re-inject the correct exception for guest delivery. * * @return: 0 (while setting run->exit_reason), -1 for error */ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) { u32 hsr = kvm_vcpu_get_hsr(vcpu); int ret = 0; run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = hsr; switch (hsr >> ESR_ELx_EC_SHIFT) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_BKPT32: case ESR_ELx_EC_BRK64: break; default: kvm_err("%s: un-handled case hsr: %#08x\n", __func__, (unsigned int) hsr); ret = -1; break; } return ret; }
static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pud_t *pud; pmd_t *pmd; unsigned long addr, next; int ret; addr = start; do { pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); return -ENOMEM; } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pud_addr_end(addr, end); ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (ret) return ret; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); return 0; }
/* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. */ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { exit_handle_fn exit_handler; switch (exception_index) { case ARM_EXCEPTION_IRQ: return 1; case ARM_EXCEPTION_UNDEFINED: kvm_err("Undefined exception in Hyp mode at: %#08lx\n", kvm_vcpu_get_hyp_pc(vcpu)); BUG(); panic("KVM: Hypervisor undefined exception!\n"); case ARM_EXCEPTION_DATA_ABORT: case ARM_EXCEPTION_PREF_ABORT: case ARM_EXCEPTION_HVC: /* * See ARM ARM B1.14.1: "Hyp traps on instructions * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } exit_handler = kvm_get_exit_handler(vcpu); return exit_handler(vcpu, run); default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return 0; } }
static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) { /* The hypervisor should never cause aborts */ kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); return -EFAULT; }
static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) { /* This is either an error in the ws. code or an external abort */ kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); return -EFAULT; }
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { unsigned long data; unsigned long rt; int ret; bool is_write; int len; u8 data_buf[8]; /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels * responsible, otherwise let user space do its magic. */ if (kvm_vcpu_dabt_isvalid(vcpu)) { ret = decode_hsr(vcpu, &is_write, &len); if (ret) return ret; } else { kvm_err("load/store instruction decoding not implemented\n"); return -ENOSYS; } rt = vcpu->arch.mmio_decode.rt; if (is_write) { data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, fault_ipa, 0); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } /* Now prepare kvm_run for the potential return to userland. */ run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; memcpy(run->mmio.data, data_buf, len); if (!ret) { /* We handled the access successfully in the kernel. */ kvm_handle_mmio_return(vcpu, run); return 1; } run->exit_reason = KVM_EXIT_MMIO; return 0; }
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool fault_ipa_uncached; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { kvm_err("Failed to find VMA for hva 0x%lx\n", hva); up_read(¤t->mm->mmap_sem); return -EFAULT; } if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { /* * Pages belonging to memslots that don't have the same * alignment for userspace and IPA cannot be mapped using * block descriptors even if the pages belong to a THP for * the process, because the stage-2 block descriptor will * cover more than a single THP and we loose atomicity for * unmapping, updates, and splits of the THP or other pages * in the stage-2 block range. */ if ((memslot->userspace_addr & ~PMD_MASK) !=
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn, struct kvm_memory_slot *memslot, unsigned long fault_status) { pte_t new_pte; pfn_t pfn; int ret; bool write_fault, writable; unsigned long mmu_seq; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) return ret; mmu_seq = vcpu->kvm->mmu_notifier_seq; /* * Ensure the read of mmu_notifier_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets * unmapped afterwards, the call to kvm_unmap_hva will take it away * from us again properly. This smp_rmb() interacts with the smp_wmb() * in kvm_mmu_notifier_invalidate_<page|range_end>. */ smp_rmb(); pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; new_pte = pfn_pte(pfn, PAGE_S2); coherent_icache_guest_page(vcpu->kvm, gfn); spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); return 0; }
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { kvm_err("Unknown exception class: hsr: %#08x\n", (unsigned int)kvm_vcpu_get_hsr(vcpu)); BUG(); } return arm_exit_handlers[hsr_ec]; }
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { kvm_err("Unknown exception class: hsr: %#08x -- %s\n", hsr, esr_get_class_string(hsr)); BUG(); } return arm_exit_handlers[hsr_ec]; }
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool fault_ipa_uncached; bool logging_active = memslot_is_logging(memslot); unsigned long flags = 0; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { kvm_err("Failed to find VMA for hva 0x%lx\n", hva); up_read(¤t->mm->mmap_sem); return -EFAULT; } if (is_vm_hugetlb_page(vma) && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else {
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); if (!pgd) return -ENOMEM; memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; /* * Prepare MMIO operation. First stash it in a private * structure that we can use for in-kernel emulation. If the * kernel can't handle it, copy it into run->mmio and let user * space do its magic. */ if (kvm_vcpu_dabt_isvalid(vcpu)) { ret = decode_hsr(vcpu, fault_ipa, &mmio); if (ret) return ret; } else { kvm_err("load/store instruction decoding not implemented\n"); return -ENOSYS; } rt = vcpu->arch.mmio_decode.rt; data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, fault_ipa, (mmio.is_write) ? data : 0); if (mmio.is_write) mmio_write_buf(mmio.data, mmio.len, data); if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; kvm_prepare_mmio(run, &mmio); return 0; }
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); if (!pgd) return -ENOMEM; /* stage-2 pgd must be aligned to its size */ VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }
static int __create_hyp_mappings(pgd_t *pgdp, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; }
/** * kvm_mips_trans_replace() - Replace trapping instruction in guest memory. * @vcpu: Virtual CPU. * @opc: PC of instruction to replace. * @replace: Instruction to write */ static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, union mips_instruction replace) { unsigned long kseg0_opc, flags; if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { kseg0_opc = CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa (vcpu, (unsigned long) opc)); memcpy((void *)kseg0_opc, (void *)&replace, sizeof(u32)); local_flush_icache_range(kseg0_opc, kseg0_opc + 32); } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { local_irq_save(flags); memcpy((void *)opc, (void *)&replace, sizeof(u32)); local_flush_icache_range((unsigned long)opc, (unsigned long)opc + 32); local_irq_restore(flags); } else { kvm_err("%s: Invalid address: %p\n", __func__, opc); return -EFAULT; } return 0; }
/* Deliver the interrupt of the corresponding priority, if possible. */ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, uint32_t cause) { int allowed = 0; uint32_t exccode; struct kvm_vcpu_arch *arch = &vcpu->arch; struct kvm_mips_vcpu_te *vcpu_te = vcpu->arch.impl; struct mips_coproc *cop0 = vcpu_te->cop0; switch (priority) { case MIPS_EXC_INT_TIMER: if ((kvm_read_c0_guest_status(cop0) & ST0_IE) && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) { allowed = 1; exccode = T_INT; } break; case MIPS_EXC_INT_IO: if ((kvm_read_c0_guest_status(cop0) & ST0_IE) && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) { allowed = 1; exccode = T_INT; } break; case MIPS_EXC_INT_IPI_1: if ((kvm_read_c0_guest_status(cop0) & ST0_IE) && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) { allowed = 1; exccode = T_INT; } break; case MIPS_EXC_INT_IPI_2: if ((kvm_read_c0_guest_status(cop0) & ST0_IE) && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) { allowed = 1; exccode = T_INT; } break; default: break; } /* Are we allowed to deliver the interrupt ??? */ if (allowed) { if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ kvm_write_c0_guest_epc(cop0, arch->epc); kvm_set_c0_guest_status(cop0, ST0_EXL); if (cause & CAUSEF_BD) kvm_set_c0_guest_cause(cop0, CAUSEF_BD); else kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); kvm_debug("Delivering INT @ pc %#lx\n", arch->epc); } else kvm_err("Trying to deliver interrupt when EXL is already set\n"); kvm_change_c0_guest_cause(cop0, CAUSEF_EXCCODE, (exccode << CAUSEB_EXCCODE)); /* XXXSL Set PC to the interrupt exception entry point */ if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV) arch->epc = KVM_GUEST_KSEG0 + 0x200; else arch->epc = KVM_GUEST_KSEG0 + 0x180; clear_bit(priority, &vcpu_te->pending_exceptions); } return allowed; }
/** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node * @ops: address of a pointer to the GICv3 operations * @params: address of a pointer to HW-specific parameters * * Returns 0 if a GICv3 has been found, with the low level operations * in *ops and the HW parameters in *params. Returns an error code * otherwise. */ int vgic_v3_probe(struct device_node *vgic_node, const struct vgic_ops **ops, const struct vgic_params **params) { int ret = 0; u32 gicv_idx; struct resource vcpu_res; struct vgic_params *vgic = &vgic_v3_params; vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); if (!vgic->maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); /* * The ListRegs field is 5 bits, but there is a architectural * maximum of 16 list registers. Just ignore bit 4... */ vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; vgic->can_emulate_gicv2 = false; if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) gicv_idx = 1; gicv_idx += 3; /* Also skip GICD, GICC, GICH */ if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { kvm_info("GICv3: no GICV resource entry\n"); vgic->vcpu_base = 0; } else if (!PAGE_ALIGNED(vcpu_res.start)) { pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)vcpu_res.start); vgic->vcpu_base = 0; } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", (unsigned long long)resource_size(&vcpu_res), PAGE_SIZE); vgic->vcpu_base = 0; } else { vgic->vcpu_base = vcpu_res.start; vgic->can_emulate_gicv2 = true; kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); } if (vgic->vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); vgic->vctrl_base = NULL; vgic->type = VGIC_V3; vgic->max_gic_vcpus = KVM_MAX_VCPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); *ops = &vgic_v3_ops; *params = vgic; out: of_node_put(vgic_node); return ret; }
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) return ret; mmu_seq = vcpu->kvm->mmu_notifier_seq; /* * Ensure the read of mmu_notifier_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets * unmapped afterwards, the call to kvm_unmap_hva will take it away * from us again properly. This smp_rmb() interacts with the smp_wmb() * in kvm_mmu_notifier_invalidate_<page|range_end>. */ smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); new_pmd = pmd_mkhuge(new_pmd); if (writable) { kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, PAGE_S2); if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } coherent_icache_guest_page(kvm, hva, PAGE_SIZE); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } out_unlock: spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); return ret; }
/** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer * @run: the kvm_run structure * * Any abort that gets to the host is almost guaranteed to be caused by a * missing second stage translation table entry, which can mean that either the * guest simply needs more memory and we must allocate an appropriate page or it * can mean that the guest tried to access I/O memory, which is emulated by user * space. The distinction is based on the IPA causing the fault and whether this * memory region has been registered as standard RAM by user space. */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; bool is_iabt; gfn_t gfn; int ret, idx; is_iabt = kvm_vcpu_trap_is_iabt(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", kvm_vcpu_trap_get_class(vcpu), fault_status); return -EFAULT; } idx = srcu_read_lock(&vcpu->kvm->srcu); gfn = fault_ipa >> PAGE_SHIFT; if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { if (is_iabt) { /* Prefetch Abort on I/O address */ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); ret = 1; goto out_unlock; } if (fault_status != FSC_FAULT) { kvm_err("Unsupported fault status on io memory: %#lx\n", fault_status); ret = -EFAULT; goto out_unlock; } /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the * faulting VA. This is always 12 bits, irrespective * of the page size. */ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); ret = io_mem_abort(vcpu, run, fault_ipa); goto out_unlock; } memslot = gfn_to_memslot(vcpu->kvm, gfn); ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; }
/** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can * support either full 40-bit input addresses or limited to 32-bit input * addresses). Clears the allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } hwpgd = kvm_alloc_hwpgd(); if (!hwpgd) return -ENOMEM; /* When the kernel uses more levels of page tables than the * guest, we allocate a fake PGD and pre-populate it to point * to the next-level page table, which will be the real * initial page table pointed to by the VTTBR. * * When KVM_PREALLOC_LEVEL==2, we allocate a single page for * the PMD and the kernel will use folded pud. * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD * pages. */ if (KVM_PREALLOC_LEVEL > 0) { int i; /* * Allocate fake pgd for the page table manipulation macros to * work. This is not used by the hardware and we have no * alignment requirement for this allocation. */ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO); if (!pgd) { kvm_free_hwpgd(hwpgd); return -ENOMEM; } /* Plug the HW PGD into the fake one. */ for (i = 0; i < PTRS_PER_S2_PGD; i++) { if (KVM_PREALLOC_LEVEL == 1) pgd_populate(NULL, pgd + i, (pud_t *)hwpgd + i * PTRS_PER_PUD); else if (KVM_PREALLOC_LEVEL == 2) pud_populate(NULL, pud_offset(pgd, 0) + i, (pmd_t *)hwpgd + i * PTRS_PER_PMD); } } else { /* * Allocate actual first-level Stage-2 page table used by the * hardware for Stage-2 page table walks. */ pgd = (pgd_t *)hwpgd; } kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; }