Beispiel #1
0
static struct mcinfo_bank *mca_init_bank(enum mca_source who,
                                         struct mc_info *mi, int bank)
{
    struct mcinfo_bank *mib;
    uint64_t addr=0, misc = 0;

    if (!mi)
        return NULL;

    mib = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_bank));
    if (!mib)
    {
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
        return NULL;
    }

    memset(mib, 0, sizeof (struct mcinfo_bank));
    mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));

    mib->common.type = MC_TYPE_BANK;
    mib->common.size = sizeof (struct mcinfo_bank);
    mib->mc_bank = bank;

    addr = misc = 0;
    if (mib->mc_status & MCi_STATUS_MISCV)
        mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));

    if (mib->mc_status & MCi_STATUS_ADDRV)
    {
        mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));

        if (mfn_valid(paddr_to_pfn(mib->mc_addr))) {
            struct domain *d;

            d = maddr_get_owner(mib->mc_addr);
            if (d != NULL && (who == MCA_POLLER ||
                              who == MCA_CMCI_HANDLER))
                mib->mc_domid = d->domain_id;
        }
    }

    if (who == MCA_CMCI_HANDLER) {
        mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
        rdtscll(mib->mc_tsc);
    }

    return mib;
}
Beispiel #2
0
static void mca_init_bank(enum mca_source who,
    struct mc_info *mi, int bank)
{
    struct mcinfo_bank *mib;

    if (!mi)
        return;

    mib = x86_mcinfo_reserve(mi, sizeof(*mib));
    if (!mib)
    {
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
        return;
    }

    mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));

    mib->common.type = MC_TYPE_BANK;
    mib->common.size = sizeof (struct mcinfo_bank);
    mib->mc_bank = bank;

    if (mib->mc_status & MCi_STATUS_MISCV)
        mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));

    if (mib->mc_status & MCi_STATUS_ADDRV)
        mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));

    if ((mib->mc_status & MCi_STATUS_MISCV) &&
        (mib->mc_status & MCi_STATUS_ADDRV) &&
        (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) &&
        (who == MCA_POLLER || who == MCA_CMCI_HANDLER) &&
        (mfn_valid(paddr_to_pfn(mib->mc_addr))))
    {
        struct domain *d;

        d = maddr_get_owner(mib->mc_addr);
        if (d)
            mib->mc_domid = d->domain_id;
    }

    if (who == MCA_CMCI_HANDLER) {
        mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
        rdtscll(mib->mc_tsc);
    }
}
Beispiel #3
0
/* Machine Check Handler for AMD K8 family series */
void k8_machine_check(struct cpu_user_regs *regs, long error_code)
{
	struct vcpu *vcpu = current;
	struct domain *curdom;
	struct mc_info *mc_data;
	struct mcinfo_global mc_global;
	struct mcinfo_bank mc_info;
	uint64_t status, addrv, miscv, uc;
	uint32_t i;
	unsigned int cpu_nr;
	uint32_t xen_impacted = 0;
#define DOM_NORMAL	0
#define DOM0_TRAP	1
#define DOMU_TRAP	2
#define DOMU_KILLED	4
	uint32_t dom_state = DOM_NORMAL;

	/* This handler runs as interrupt gate. So IPIs from the
	 * polling service routine are defered until we finished.
	 */

        /* Disable interrupts for the _vcpu_. It may not re-scheduled to
	 * an other physical CPU or the impacted process in the guest
	 * continues running with corrupted data, otherwise. */
        vcpu_schedule_lock_irq(vcpu);

	mc_data = x86_mcinfo_getptr();
	cpu_nr = smp_processor_id();
	curdom = vcpu->domain;

	memset(&mc_global, 0, sizeof(mc_global));
	mc_global.common.type = MC_TYPE_GLOBAL;
	mc_global.common.size = sizeof(mc_global);

	mc_global.mc_domid = curdom->domain_id; /* impacted domain */
	mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
	BUG_ON(cpu_nr != vcpu->processor);
	mc_global.mc_core_threadid = 0;
	mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
#if 0 /* TODO: on which socket is this physical core?
         It's not clear to me how to figure this out. */
	mc_global.mc_socketid = ???;
#endif
	mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
	rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);

	/* Quick check, who is impacted */
	xen_impacted = is_idle_domain(curdom);

	/* Dom0 */
	x86_mcinfo_clear(mc_data);
	x86_mcinfo_add(mc_data, &mc_global);

	for (i = 0; i < nr_mce_banks; i++) {
		struct domain *d;

		rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);

		if (!(status & MCi_STATUS_VAL))
			continue;

		/* An error happened in this bank.
		 * This is expected to be an uncorrectable error,
		 * since correctable errors get polled.
		 */
		uc = status & MCi_STATUS_UC;

		memset(&mc_info, 0, sizeof(mc_info));
		mc_info.common.type = MC_TYPE_BANK;
		mc_info.common.size = sizeof(mc_info);
		mc_info.mc_bank = i;
		mc_info.mc_status = status;

		addrv = 0;
		if (status & MCi_STATUS_ADDRV) {
			rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
			
			d = maddr_get_owner(addrv);
			if (d != NULL)
				mc_info.mc_domid = d->domain_id;
		}

		miscv = 0;
		if (status & MCi_STATUS_MISCV)
			rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);

		mc_info.mc_addr = addrv;
		mc_info.mc_misc = miscv;

		x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */

		if (mc_callback_bank_extended)
			mc_callback_bank_extended(mc_data, i, status);

		/* clear status */
		wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
		wmb();
		add_taint(TAINT_MACHINE_CHECK);
	}

	status = mc_global.mc_gstatus;

	/* clear MCIP or cpu enters shutdown state
	 * in case another MCE occurs. */
	status &= ~MCG_STATUS_MCIP;
	wrmsrl(MSR_IA32_MCG_STATUS, status);
	wmb();

	/* For the details see the discussion "MCE/MCA concept" on xen-devel.
	 * The thread started here:
	 * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
	 */

	/* MCG_STATUS_RIPV: 
	 * When this bit is not set, then the instruction pointer onto the stack
	 * to resume at is not valid. If xen is interrupted, then we panic anyway
	 * right below. Otherwise it is up to the guest to figure out if 
	 * guest kernel or guest userland is affected and should kill either
	 * itself or the affected process.
	 */

	/* MCG_STATUS_EIPV:
	 * Evaluation of EIPV is the job of the guest.
	 */

	if (xen_impacted) {
		/* Now we are going to panic anyway. Allow interrupts, so that
		 * printk on serial console can work. */
		vcpu_schedule_unlock_irq(vcpu);

		/* Uh, that means, machine check exception
		 * inside Xen occured. */
		printk("Machine check exception occured in Xen.\n");

		/* if MCG_STATUS_EIPV indicates, the IP on the stack is related
		 * to the error then it makes sense to print a stack trace.
		 * That can be useful for more detailed error analysis and/or
		 * error case studies to figure out, if we can clear
		 * xen_impacted and kill a DomU instead
		 * (i.e. if a guest only control structure is affected, but then
		 * we must ensure the bad pages are not re-used again).
		 */
		if (status & MCG_STATUS_EIPV) {
			printk("MCE: Instruction Pointer is related to the error. "
				"Therefore, print the execution state.\n");
			show_execution_state(regs);
		}
		x86_mcinfo_dump(mc_data);
		mc_panic("End of MCE. Use mcelog to decode above error codes.\n");
	}

	/* If Dom0 registered a machine check handler, which is only possible
	 * with a PV MCA driver, then ... */
	if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
		dom_state = DOM0_TRAP;

		/* ... deliver machine check trap to Dom0. */
		send_guest_trap(dom0, 0, TRAP_machine_check);

		/* Xen may tell Dom0 now to notify the DomU.
		 * But this will happen through a hypercall. */
	} else
		/* Dom0 did not register a machine check handler, but if DomU
		 * did so, then... */
                if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) {
			dom_state = DOMU_TRAP;

			/* ... deliver machine check trap to DomU */
			send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check);
	} else {
		/* hmm... noone feels responsible to handle the error.
		 * So, do a quick check if a DomU is impacted or not.
		 */
		if (curdom == dom0) {
			/* Dom0 is impacted. Since noone can't handle
			 * this error, panic! */
			x86_mcinfo_dump(mc_data);
			mc_panic("MCE occured in Dom0, which it can't handle\n");

			/* UNREACHED */
		} else {
			dom_state = DOMU_KILLED;

			/* Enable interrupts. This basically results in
			 * calling sti on the *physical* cpu. But after
			 * domain_crash() the vcpu pointer is invalid.
			 * Therefore, we must unlock the irqs before killing
			 * it. */
			vcpu_schedule_unlock_irq(vcpu);

			/* DomU is impacted. Kill it and continue. */
			domain_crash(curdom);
		}
	}


	switch (dom_state) {
	case DOM0_TRAP:
	case DOMU_TRAP:
		/* Enable interrupts. */
		vcpu_schedule_unlock_irq(vcpu);

		/* guest softirqs and event callbacks are scheduled
		 * immediately after this handler exits. */
		break;
	case DOMU_KILLED:
		/* Nothing to do here. */
		break;
	default:
		BUG();
	}
}