Пример #1
0
int kvm_timer_hyp_init(void)
{
	struct device_node *np;
	unsigned int ppi;
	int err;

	timecounter = arch_timer_get_timecounter();
	if (!timecounter)
		return -ENODEV;

	np = of_find_matching_node(NULL, arch_timer_of_match);
	if (!np) {
		kvm_err("kvm_arch_timer: can't find DT node\n");
		return -ENODEV;
	}

	ppi = irq_of_parse_and_map(np, 2);
	if (!ppi) {
		kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
		err = -EINVAL;
		goto out;
	}

	err = request_percpu_irq(ppi, kvm_arch_timer_handler,
				 "kvm guest timer", kvm_get_running_vcpus());
	if (err) {
		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
			ppi, err);
		goto out;
	}

	timer_irq.irq = ppi;

	err = register_cpu_notifier(&kvm_timer_cpu_nb);
	if (err) {
		kvm_err("Cannot register timer CPU notifier\n");
		goto out_free;
	}

	wqueue = create_singlethread_workqueue("kvm_arch_timer");
	if (!wqueue) {
		err = -ENOMEM;
		goto out_free;
	}

	kvm_info("%s IRQ%d\n", np->name, ppi);
	on_each_cpu(kvm_timer_init_interrupt, NULL, 1);

	goto out;
out_free:
	free_percpu_irq(ppi, kvm_get_running_vcpus());
out:
	of_node_put(np);
	return err;
}
Пример #2
0
/**
 * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
 * according to the host GIC model. Accordingly calls either
 * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
 * instantiated by a guest later on .
 */
int kvm_vgic_hyp_init(void)
{
	const struct gic_kvm_info *gic_kvm_info;
	int ret;

	gic_kvm_info = gic_get_kvm_info();
	if (!gic_kvm_info)
		return -ENODEV;

	if (!gic_kvm_info->maint_irq) {
		kvm_err("No vgic maintenance irq\n");
		return -ENXIO;
	}

	switch (gic_kvm_info->type) {
	case GIC_V2:
		ret = vgic_v2_probe(gic_kvm_info);
		break;
	case GIC_V3:
		ret = vgic_v3_probe(gic_kvm_info);
		break;
	default:
		ret = -ENODEV;
	};

	if (ret)
		return ret;

	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
	ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
				 vgic_maintenance_handler,
				 "vgic", kvm_get_running_vcpus());
	if (ret) {
		kvm_err("Cannot register interrupt %d\n",
			kvm_vgic_global_state.maint_irq);
		return ret;
	}

	ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
				"AP_KVM_ARM_VGIC_INIT_STARTING",
				vgic_init_cpu_starting, vgic_init_cpu_dying);
	if (ret) {
		kvm_err("Cannot register vgic CPU notifier\n");
		goto out_free_irq;
	}

	kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
	return 0;

out_free_irq:
	free_percpu_irq(kvm_vgic_global_state.maint_irq,
			kvm_get_running_vcpus());
	return ret;
}
Пример #3
0
static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
				   unsigned long end, unsigned long pfn,
				   pgprot_t prot)
{
	pmd_t *pmd;
	pte_t *pte;
	unsigned long addr, next;

	addr = start;
	do {
		pmd = pmd_offset(pud, addr);

		BUG_ON(pmd_sect(*pmd));

		if (pmd_none(*pmd)) {
			pte = pte_alloc_one_kernel(NULL, addr);
			if (!pte) {
				kvm_err("Cannot allocate Hyp pte\n");
				return -ENOMEM;
			}
			pmd_populate_kernel(NULL, pmd, pte);
			get_page(virt_to_page(pmd));
			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
		}

		next = pmd_addr_end(addr, end);

		create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
		pfn += (next - addr) >> PAGE_SHIFT;
	} while (addr = next, addr != end);

	return 0;
}
Пример #4
0
/**
 * kvm_handle_guest_debug - handle a debug exception instruction
 *
 * @vcpu:	the vcpu pointer
 * @run:	access to the kvm_run structure for results
 *
 * We route all debug exceptions through the same handler. If both the
 * guest and host are using the same debug facilities it will be up to
 * userspace to re-inject the correct exception for guest delivery.
 *
 * @return: 0 (while setting run->exit_reason), -1 for error
 */
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
	u32 hsr = kvm_vcpu_get_hsr(vcpu);
	int ret = 0;

	run->exit_reason = KVM_EXIT_DEBUG;
	run->debug.arch.hsr = hsr;

	switch (hsr >> ESR_ELx_EC_SHIFT) {
	case ESR_ELx_EC_WATCHPT_LOW:
		run->debug.arch.far = vcpu->arch.fault.far_el2;
		/* fall through */
	case ESR_ELx_EC_SOFTSTP_LOW:
	case ESR_ELx_EC_BREAKPT_LOW:
	case ESR_ELx_EC_BKPT32:
	case ESR_ELx_EC_BRK64:
		break;
	default:
		kvm_err("%s: un-handled case hsr: %#08x\n",
			__func__, (unsigned int) hsr);
		ret = -1;
		break;
	}

	return ret;
}
Пример #5
0
static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
				   unsigned long end, unsigned long pfn,
				   pgprot_t prot)
{
	pud_t *pud;
	pmd_t *pmd;
	unsigned long addr, next;
	int ret;

	addr = start;
	do {
		pud = pud_offset(pgd, addr);

		if (pud_none_or_clear_bad(pud)) {
			pmd = pmd_alloc_one(NULL, addr);
			if (!pmd) {
				kvm_err("Cannot allocate Hyp pmd\n");
				return -ENOMEM;
			}
			pud_populate(NULL, pud, pmd);
			get_page(virt_to_page(pud));
			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
		}

		next = pud_addr_end(addr, end);
		ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
		if (ret)
			return ret;
		pfn += (next - addr) >> PAGE_SHIFT;
	} while (addr = next, addr != end);

	return 0;
}
Пример #6
0
/*
 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
 * proper exit to userspace.
 */
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
		       int exception_index)
{
	exit_handle_fn exit_handler;

	switch (exception_index) {
	case ARM_EXCEPTION_IRQ:
		return 1;
	case ARM_EXCEPTION_UNDEFINED:
		kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
			kvm_vcpu_get_hyp_pc(vcpu));
		BUG();
		panic("KVM: Hypervisor undefined exception!\n");
	case ARM_EXCEPTION_DATA_ABORT:
	case ARM_EXCEPTION_PREF_ABORT:
	case ARM_EXCEPTION_HVC:
		/*
		 * See ARM ARM B1.14.1: "Hyp traps on instructions
		 * that fail their condition code check"
		 */
		if (!kvm_condition_valid(vcpu)) {
			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
			return 1;
		}

		exit_handler = kvm_get_exit_handler(vcpu);

		return exit_handler(vcpu, run);
	default:
		kvm_pr_unimpl("Unsupported exception type: %d",
			      exception_index);
		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
		return 0;
	}
}
Пример #7
0
static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
	/* The hypervisor should never cause aborts */
	kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
		kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
	return -EFAULT;
}
Пример #8
0
static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
	/* This is either an error in the ws. code or an external abort */
	kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
		kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
	return -EFAULT;
}
Пример #9
0
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
		 phys_addr_t fault_ipa)
{
	unsigned long data;
	unsigned long rt;
	int ret;
	bool is_write;
	int len;
	u8 data_buf[8];

	/*
	 * Prepare MMIO operation. First decode the syndrome data we get
	 * from the CPU. Then try if some in-kernel emulation feels
	 * responsible, otherwise let user space do its magic.
	 */
	if (kvm_vcpu_dabt_isvalid(vcpu)) {
		ret = decode_hsr(vcpu, &is_write, &len);
		if (ret)
			return ret;
	} else {
		kvm_err("load/store instruction decoding not implemented\n");
		return -ENOSYS;
	}

	rt = vcpu->arch.mmio_decode.rt;

	if (is_write) {
		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
					       len);

		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
		mmio_write_buf(data_buf, len, data);

		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
				       data_buf);
	} else {
		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
			       fault_ipa, 0);

		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
				      data_buf);
	}

	/* Now prepare kvm_run for the potential return to userland. */
	run->mmio.is_write	= is_write;
	run->mmio.phys_addr	= fault_ipa;
	run->mmio.len		= len;
	memcpy(run->mmio.data, data_buf, len);

	if (!ret) {
		/* We handled the access successfully in the kernel. */
		kvm_handle_mmio_return(vcpu, run);
		return 1;
	}

	run->exit_reason	= KVM_EXIT_MMIO;
	return 0;
}
Пример #10
0
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_memory_slot *memslot, unsigned long hva,
			  unsigned long fault_status)
{
	int ret;
	bool write_fault, writable, hugetlb = false, force_pte = false;
	unsigned long mmu_seq;
	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
	struct kvm *kvm = vcpu->kvm;
	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
	struct vm_area_struct *vma;
	pfn_t pfn;
	pgprot_t mem_type = PAGE_S2;
	bool fault_ipa_uncached;

	write_fault = kvm_is_write_fault(vcpu);
	if (fault_status == FSC_PERM && !write_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}

	/* Let's check if we will get back a huge page backed by hugetlbfs */
	down_read(&current->mm->mmap_sem);
	vma = find_vma_intersection(current->mm, hva, hva + 1);
	if (unlikely(!vma)) {
		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
		up_read(&current->mm->mmap_sem);
		return -EFAULT;
	}

	if (is_vm_hugetlb_page(vma)) {
		hugetlb = true;
		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
	} else {
		/*
		 * Pages belonging to memslots that don't have the same
		 * alignment for userspace and IPA cannot be mapped using
		 * block descriptors even if the pages belong to a THP for
		 * the process, because the stage-2 block descriptor will
		 * cover more than a single THP and we loose atomicity for
		 * unmapping, updates, and splits of the THP or other pages
		 * in the stage-2 block range.
		 */
		if ((memslot->userspace_addr & ~PMD_MASK) !=
Пример #11
0
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  gfn_t gfn, struct kvm_memory_slot *memslot,
			  unsigned long fault_status)
{
	pte_t new_pte;
	pfn_t pfn;
	int ret;
	bool write_fault, writable;
	unsigned long mmu_seq;
	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;

	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
	if (fault_status == FSC_PERM && !write_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}

	/* We need minimum second+third level pages */
	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
	if (ret)
		return ret;

	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	/*
	 * Ensure the read of mmu_notifier_seq happens before we call
	 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
	 * the page we just got a reference to gets unmapped before we have a
	 * chance to grab the mmu_lock, which ensure that if the page gets
	 * unmapped afterwards, the call to kvm_unmap_hva will take it away
	 * from us again properly. This smp_rmb() interacts with the smp_wmb()
	 * in kvm_mmu_notifier_invalidate_<page|range_end>.
	 */
	smp_rmb();

	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
	if (is_error_pfn(pfn))
		return -EFAULT;

	new_pte = pfn_pte(pfn, PAGE_S2);
	coherent_icache_guest_page(vcpu->kvm, gfn);

	spin_lock(&vcpu->kvm->mmu_lock);
	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
		goto out_unlock;
	if (writable) {
		kvm_set_s2pte_writable(&new_pte);
		kvm_set_pfn_dirty(pfn);
	}
	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
}
Пример #12
0
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);

	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
	    !arm_exit_handlers[hsr_ec]) {
		kvm_err("Unknown exception class: hsr: %#08x\n",
			(unsigned int)kvm_vcpu_get_hsr(vcpu));
		BUG();
	}

	return arm_exit_handlers[hsr_ec];
}
Пример #13
0
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
	u32 hsr = kvm_vcpu_get_hsr(vcpu);
	u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;

	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
	    !arm_exit_handlers[hsr_ec]) {
		kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
			hsr, esr_get_class_string(hsr));
		BUG();
	}

	return arm_exit_handlers[hsr_ec];
}
Пример #14
0
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_memory_slot *memslot, unsigned long hva,
			  unsigned long fault_status)
{
	int ret;
	bool write_fault, writable, hugetlb = false, force_pte = false;
	unsigned long mmu_seq;
	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
	struct kvm *kvm = vcpu->kvm;
	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
	struct vm_area_struct *vma;
	pfn_t pfn;
	pgprot_t mem_type = PAGE_S2;
	bool fault_ipa_uncached;
	bool logging_active = memslot_is_logging(memslot);
	unsigned long flags = 0;

	write_fault = kvm_is_write_fault(vcpu);
	if (fault_status == FSC_PERM && !write_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}

	/* Let's check if we will get back a huge page backed by hugetlbfs */
	down_read(&current->mm->mmap_sem);
	vma = find_vma_intersection(current->mm, hva, hva + 1);
	if (unlikely(!vma)) {
		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
		up_read(&current->mm->mmap_sem);
		return -EFAULT;
	}

	if (is_vm_hugetlb_page(vma) && !logging_active) {
		hugetlb = true;
		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
	} else {
Пример #15
0
/**
 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
 * @kvm:	The KVM struct pointer for the VM.
 *
 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
 * support either full 40-bit input addresses or limited to 32-bit input
 * addresses). Clears the allocated pages.
 *
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
	pgd_t *pgd;

	if (kvm->arch.pgd != NULL) {
		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
	if (!pgd)
		return -ENOMEM;

	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
	kvm_clean_pgd(pgd);
	kvm->arch.pgd = pgd;

	return 0;
}
Пример #16
0
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
		 phys_addr_t fault_ipa)
{
	struct kvm_exit_mmio mmio;
	unsigned long data;
	unsigned long rt;
	int ret;

	/*
	 * Prepare MMIO operation. First stash it in a private
	 * structure that we can use for in-kernel emulation. If the
	 * kernel can't handle it, copy it into run->mmio and let user
	 * space do its magic.
	 */

	if (kvm_vcpu_dabt_isvalid(vcpu)) {
		ret = decode_hsr(vcpu, fault_ipa, &mmio);
		if (ret)
			return ret;
	} else {
		kvm_err("load/store instruction decoding not implemented\n");
		return -ENOSYS;
	}

	rt = vcpu->arch.mmio_decode.rt;
	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);

	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
					 KVM_TRACE_MMIO_READ_UNSATISFIED,
			mmio.len, fault_ipa,
			(mmio.is_write) ? data : 0);

	if (mmio.is_write)
		mmio_write_buf(mmio.data, mmio.len, data);

	if (vgic_handle_mmio(vcpu, run, &mmio))
		return 1;

	kvm_prepare_mmio(run, &mmio);
	return 0;
}
Пример #17
0
/**
 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
 * @kvm:	The KVM struct pointer for the VM.
 *
 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
 * support either full 40-bit input addresses or limited to 32-bit input
 * addresses). Clears the allocated pages.
 *
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
	pgd_t *pgd;

	if (kvm->arch.pgd != NULL) {
		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
	if (!pgd)
		return -ENOMEM;

	/* stage-2 pgd must be aligned to its size */
	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));

	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
	kvm_clean_pgd(pgd);
	kvm->arch.pgd = pgd;

	return 0;
}
Пример #18
0
static int __create_hyp_mappings(pgd_t *pgdp,
				 unsigned long start, unsigned long end,
				 unsigned long pfn, pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	unsigned long addr, next;
	int err = 0;

	mutex_lock(&kvm_hyp_pgd_mutex);
	addr = start & PAGE_MASK;
	end = PAGE_ALIGN(end);
	do {
		pgd = pgdp + pgd_index(addr);
		pud = pud_offset(pgd, addr);

		if (pud_none_or_clear_bad(pud)) {
			pmd = pmd_alloc_one(NULL, addr);
			if (!pmd) {
				kvm_err("Cannot allocate Hyp pmd\n");
				err = -ENOMEM;
				goto out;
			}
			pud_populate(NULL, pud, pmd);
			get_page(virt_to_page(pud));
			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
		}

		next = pgd_addr_end(addr, end);
		err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
		if (err)
			goto out;
		pfn += (next - addr) >> PAGE_SHIFT;
	} while (addr = next, addr != end);
out:
	mutex_unlock(&kvm_hyp_pgd_mutex);
	return err;
}
Пример #19
0
/**
 * kvm_mips_trans_replace() - Replace trapping instruction in guest memory.
 * @vcpu:	Virtual CPU.
 * @opc:	PC of instruction to replace.
 * @replace:	Instruction to write
 */
static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc,
				  union mips_instruction replace)
{
	unsigned long kseg0_opc, flags;

	if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
		kseg0_opc =
		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
			       (vcpu, (unsigned long) opc));
		memcpy((void *)kseg0_opc, (void *)&replace, sizeof(u32));
		local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
		local_irq_save(flags);
		memcpy((void *)opc, (void *)&replace, sizeof(u32));
		local_flush_icache_range((unsigned long)opc,
					 (unsigned long)opc + 32);
		local_irq_restore(flags);
	} else {
		kvm_err("%s: Invalid address: %p\n", __func__, opc);
		return -EFAULT;
	}

	return 0;
}
Пример #20
0
/* Deliver the interrupt of the corresponding priority, if possible. */
int
kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
			uint32_t cause)
{
	int allowed = 0;
	uint32_t exccode;

	struct kvm_vcpu_arch *arch = &vcpu->arch;
	struct kvm_mips_vcpu_te *vcpu_te = vcpu->arch.impl;
	struct mips_coproc *cop0 = vcpu_te->cop0;

	switch (priority) {
	case MIPS_EXC_INT_TIMER:
		if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
		    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
		    && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
			allowed = 1;
			exccode = T_INT;
		}
		break;

	case MIPS_EXC_INT_IO:
		if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
		    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
		    && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
			allowed = 1;
			exccode = T_INT;
		}
		break;

	case MIPS_EXC_INT_IPI_1:
		if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
		    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
		    && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
			allowed = 1;
			exccode = T_INT;
		}
		break;

	case MIPS_EXC_INT_IPI_2:
		if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
		    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
		    && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
			allowed = 1;
			exccode = T_INT;
		}
		break;

	default:
		break;
	}

	/* Are we allowed to deliver the interrupt ??? */
	if (allowed) {

		if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
			/* save old pc */
			kvm_write_c0_guest_epc(cop0, arch->epc);
			kvm_set_c0_guest_status(cop0, ST0_EXL);

			if (cause & CAUSEF_BD)
				kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
			else
				kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);

			kvm_debug("Delivering INT @ pc %#lx\n", arch->epc);

		} else
			kvm_err("Trying to deliver interrupt when EXL is already set\n");

		kvm_change_c0_guest_cause(cop0, CAUSEF_EXCCODE,
					  (exccode << CAUSEB_EXCCODE));

		/* XXXSL Set PC to the interrupt exception entry point */
		if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV)
			arch->epc = KVM_GUEST_KSEG0 + 0x200;
		else
			arch->epc = KVM_GUEST_KSEG0 + 0x180;

		clear_bit(priority, &vcpu_te->pending_exceptions);
	}

	return allowed;
}
Пример #21
0
/**
 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
 * @node:	pointer to the DT node
 * @ops: 	address of a pointer to the GICv3 operations
 * @params:	address of a pointer to HW-specific parameters
 *
 * Returns 0 if a GICv3 has been found, with the low level operations
 * in *ops and the HW parameters in *params. Returns an error code
 * otherwise.
 */
int vgic_v3_probe(struct device_node *vgic_node,
		  const struct vgic_ops **ops,
		  const struct vgic_params **params)
{
	int ret = 0;
	u32 gicv_idx;
	struct resource vcpu_res;
	struct vgic_params *vgic = &vgic_v3_params;

	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
	if (!vgic->maint_irq) {
		kvm_err("error getting vgic maintenance irq from DT\n");
		ret = -ENXIO;
		goto out;
	}

	ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);

	/*
	 * The ListRegs field is 5 bits, but there is a architectural
	 * maximum of 16 list registers. Just ignore bit 4...
	 */
	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
	vgic->can_emulate_gicv2 = false;

	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
		gicv_idx = 1;

	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
		kvm_info("GICv3: no GICV resource entry\n");
		vgic->vcpu_base = 0;
	} else if (!PAGE_ALIGNED(vcpu_res.start)) {
		pr_warn("GICV physical address 0x%llx not page aligned\n",
			(unsigned long long)vcpu_res.start);
		vgic->vcpu_base = 0;
	} else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
		pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
			(unsigned long long)resource_size(&vcpu_res),
			PAGE_SIZE);
		vgic->vcpu_base = 0;
	} else {
		vgic->vcpu_base = vcpu_res.start;
		vgic->can_emulate_gicv2 = true;
		kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
					KVM_DEV_TYPE_ARM_VGIC_V2);
	}
	if (vgic->vcpu_base == 0)
		kvm_info("disabling GICv2 emulation\n");
	kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);

	vgic->vctrl_base = NULL;
	vgic->type = VGIC_V3;
	vgic->max_gic_vcpus = KVM_MAX_VCPUS;

	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
		 vcpu_res.start, vgic->maint_irq);

	*ops = &vgic_v3_ops;
	*params = vgic;

out:
	of_node_put(vgic_node);
	return ret;
}
Пример #22
0
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_memory_slot *memslot,
			  unsigned long fault_status)
{
	int ret;
	bool write_fault, writable, hugetlb = false;
	unsigned long mmu_seq;
	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
	struct kvm *kvm = vcpu->kvm;
	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
	struct vm_area_struct *vma;
	pfn_t pfn;

	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
	if (fault_status == FSC_PERM && !write_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}

	/* Let's check if we will get back a huge page backed by hugetlbfs */
	down_read(&current->mm->mmap_sem);
	vma = find_vma_intersection(current->mm, hva, hva + 1);
	if (is_vm_hugetlb_page(vma)) {
		hugetlb = true;
		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
	}
	up_read(&current->mm->mmap_sem);

	/* We need minimum second+third level pages */
	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
	if (ret)
		return ret;

	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	/*
	 * Ensure the read of mmu_notifier_seq happens before we call
	 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
	 * the page we just got a reference to gets unmapped before we have a
	 * chance to grab the mmu_lock, which ensure that if the page gets
	 * unmapped afterwards, the call to kvm_unmap_hva will take it away
	 * from us again properly. This smp_rmb() interacts with the smp_wmb()
	 * in kvm_mmu_notifier_invalidate_<page|range_end>.
	 */
	smp_rmb();

	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
	if (is_error_pfn(pfn))
		return -EFAULT;

	spin_lock(&kvm->mmu_lock);
	if (mmu_notifier_retry(kvm, mmu_seq))
		goto out_unlock;

	if (hugetlb) {
		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
		new_pmd = pmd_mkhuge(new_pmd);
		if (writable) {
			kvm_set_s2pmd_writable(&new_pmd);
			kvm_set_pfn_dirty(pfn);
		}
		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
	} else {
		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
		if (writable) {
			kvm_set_s2pte_writable(&new_pte);
			kvm_set_pfn_dirty(pfn);
		}
		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
	}


out_unlock:
	spin_unlock(&kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return ret;
}
Пример #23
0
/**
 * kvm_handle_guest_abort - handles all 2nd stage aborts
 * @vcpu:	the VCPU pointer
 * @run:	the kvm_run structure
 *
 * Any abort that gets to the host is almost guaranteed to be caused by a
 * missing second stage translation table entry, which can mean that either the
 * guest simply needs more memory and we must allocate an appropriate page or it
 * can mean that the guest tried to access I/O memory, which is emulated by user
 * space. The distinction is based on the IPA causing the fault and whether this
 * memory region has been registered as standard RAM by user space.
 */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
	unsigned long fault_status;
	phys_addr_t fault_ipa;
	struct kvm_memory_slot *memslot;
	bool is_iabt;
	gfn_t gfn;
	int ret, idx;

	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);

	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
			      kvm_vcpu_get_hfar(vcpu), fault_ipa);

	/* Check the stage-2 fault is trans. fault or write fault */
	fault_status = kvm_vcpu_trap_get_fault(vcpu);
	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
			kvm_vcpu_trap_get_class(vcpu), fault_status);
		return -EFAULT;
	}

	idx = srcu_read_lock(&vcpu->kvm->srcu);

	gfn = fault_ipa >> PAGE_SHIFT;
	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
		if (is_iabt) {
			/* Prefetch Abort on I/O address */
			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
			ret = 1;
			goto out_unlock;
		}

		if (fault_status != FSC_FAULT) {
			kvm_err("Unsupported fault status on io memory: %#lx\n",
				fault_status);
			ret = -EFAULT;
			goto out_unlock;
		}

		/*
		 * The IPA is reported as [MAX:12], so we need to
		 * complement it with the bottom 12 bits from the
		 * faulting VA. This is always 12 bits, irrespective
		 * of the page size.
		 */
		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
		ret = io_mem_abort(vcpu, run, fault_ipa);
		goto out_unlock;
	}

	memslot = gfn_to_memslot(vcpu->kvm, gfn);

	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
	if (ret == 0)
		ret = 1;
out_unlock:
	srcu_read_unlock(&vcpu->kvm->srcu, idx);
	return ret;
}
Пример #24
0
/**
 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
 * @kvm:	The KVM struct pointer for the VM.
 *
 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
 * support either full 40-bit input addresses or limited to 32-bit input
 * addresses). Clears the allocated pages.
 *
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
	pgd_t *pgd;
	void *hwpgd;

	if (kvm->arch.pgd != NULL) {
		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	hwpgd = kvm_alloc_hwpgd();
	if (!hwpgd)
		return -ENOMEM;

	/* When the kernel uses more levels of page tables than the
	 * guest, we allocate a fake PGD and pre-populate it to point
	 * to the next-level page table, which will be the real
	 * initial page table pointed to by the VTTBR.
	 *
	 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
	 * the PMD and the kernel will use folded pud.
	 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
	 * pages.
	 */
	if (KVM_PREALLOC_LEVEL > 0) {
		int i;

		/*
		 * Allocate fake pgd for the page table manipulation macros to
		 * work.  This is not used by the hardware and we have no
		 * alignment requirement for this allocation.
		 */
		pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
				       GFP_KERNEL | __GFP_ZERO);

		if (!pgd) {
			kvm_free_hwpgd(hwpgd);
			return -ENOMEM;
		}

		/* Plug the HW PGD into the fake one. */
		for (i = 0; i < PTRS_PER_S2_PGD; i++) {
			if (KVM_PREALLOC_LEVEL == 1)
				pgd_populate(NULL, pgd + i,
					     (pud_t *)hwpgd + i * PTRS_PER_PUD);
			else if (KVM_PREALLOC_LEVEL == 2)
				pud_populate(NULL, pud_offset(pgd, 0) + i,
					     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
		}
	} else {
		/*
		 * Allocate actual first-level Stage-2 page table used by the
		 * hardware for Stage-2 page table walks.
		 */
		pgd = (pgd_t *)hwpgd;
	}

	kvm_clean_pgd(pgd);
	kvm->arch.pgd = pgd;
	return 0;
}