Beispiel #1
0
void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
{
	if (c->x86_vendor != X86_VENDOR_INTEL)
		return;
	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
	if (c->x86 == 15 && c->x86_cache_alignment == 64)
		c->x86_cache_alignment = 128;

	/* Unmask CPUID levels if masked: */
	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
		u64 misc_enable;

		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);

		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
			misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
			c->cpuid_level = cpuid_eax(0);
			printk("revised cpuid_level = %d\n", c->cpuid_level);
		}
	}
}
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
	/* Unmask CPUID levels if masked: */
	if (c->x86 == 6 && c->x86_model >= 15) {
		u64 misc_enable;

		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);

		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
			misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
			c->cpuid_level = cpuid_eax(0);
		}
	}

	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
	if (c->x86 == 15 && c->x86_cache_alignment == 64)
		c->x86_cache_alignment = 128;
	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
		(c->x86 == 0x6 && c->x86_model >= 0x0e))
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
Beispiel #3
0
static void do_drv_write(void *drvcmd)
{
    struct drv_cmd *cmd;
    uint64_t msr_content;

    cmd = (struct drv_cmd *)drvcmd;

    switch (cmd->type) {
    case SYSTEM_INTEL_MSR_CAPABLE:
        rdmsrl(cmd->addr.msr.reg, msr_content);
        msr_content = (msr_content & ~INTEL_MSR_RANGE)
            | (cmd->val & INTEL_MSR_RANGE);
        wrmsrl(cmd->addr.msr.reg, msr_content);
        break;
    case SYSTEM_IO_CAPABLE:
        acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
            cmd->val, (u32)cmd->addr.io.bit_width);
        break;
    default:
        break;
    }
}
static void intel_thermal_interrupt(struct cpu_user_regs *regs)
{
    uint64_t msr_content;
    unsigned int cpu = smp_processor_id();
    static DEFINE_PER_CPU(s_time_t, next);

    ack_APIC_irq();

    if (NOW() < per_cpu(next, cpu))
        return;

    per_cpu(next, cpu) = NOW() + MILLISECS(5000);
    rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
    if (msr_content & 0x1) {
        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
                cpu);
        add_taint(TAINT_MACHINE_CHECK);
    } else {
        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
    }
}
Beispiel #5
0
static int apply_microcode(int cpu)
{
    unsigned long flags;
    struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
    uint32_t rev;
    struct microcode_amd *mc_amd = uci->mc.mc_amd;

    /* We should bind the task to the CPU */
    BUG_ON(raw_smp_processor_id() != cpu);

    if ( mc_amd == NULL )
        return -EINVAL;

    spin_lock_irqsave(&microcode_update_lock, flags);

    wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code);

    /* get patch id after patching */
    rdmsrl(MSR_AMD_PATCHLEVEL, rev);

    spin_unlock_irqrestore(&microcode_update_lock, flags);

    /* check current patch id and patch's id for match */
    if ( rev != mc_amd->hdr.patch_id )
    {
        printk(KERN_ERR "microcode: CPU%d update from revision "
               "0x%x to 0x%x failed\n", cpu,
               mc_amd->hdr.patch_id, rev);
        return -EIO;
    }

    printk(KERN_INFO "microcode: CPU%d updated from revision %#x to %#x\n",
           cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);

    uci->cpu_sig.rev = rev;

    return 0;
}
Beispiel #6
0
static int __pminit setup_p4_watchdog(void)
{
    uint64_t misc_enable;

    rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
    if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
        return 0;

    nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
    nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
    if ( boot_cpu_data.x86_num_siblings == 2 )
        nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;

    if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
        clear_msr_range(0x3F1, 2);
    /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
       docs doesn't fully define it, so leave it alone for now. */
    if (boot_cpu_data.x86_model >= 0x3) {
        /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
        clear_msr_range(0x3A0, 26);
        clear_msr_range(0x3BC, 3);
    } else {
        clear_msr_range(0x3A0, 31);
    }
    clear_msr_range(0x3C0, 6);
    clear_msr_range(0x3C8, 6);
    clear_msr_range(0x3E0, 2);
    clear_msr_range(MSR_P4_BPU_CCCR0, 18);
    clear_msr_range(MSR_P4_BPU_PERFCTR0, 18);
        
    wrmsrl(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0);
    wrmsrl(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE);
    write_watchdog_counter("P4_IQ_COUNTER0");
    apic_write(APIC_LVTPC, APIC_DM_NMI);
    wrmsrl(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val);
    return 1;
}
Beispiel #7
0
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
	/* Unmask CPUID levels if masked: */
	if (c->x86 == 6 && c->x86_model >= 15) {
		u64 misc_enable;

		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);

		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
			misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
			c->cpuid_level = cpuid_eax(0);
		}
	}

	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
		(c->x86 == 0x6 && c->x86_model >= 0x0e))
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);

#ifdef CONFIG_X86_64
	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#else
	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
	if (c->x86 == 15 && c->x86_cache_alignment == 64)
		c->x86_cache_alignment = 128;
#endif

	/*
	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
	 * with P/T states and does not stop in deep C-states
	 */
	if (c->x86_power & (1 << 8)) {
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
	}

}
Beispiel #8
0
static int ppro_check_ctrs(struct pt_regs * const regs,
			   struct op_msrs const * const msrs)
{
	u64 val;
	int i;

	/*
	 * This can happen if perf counters are in use when
	 * we steal the die notifier NMI.
	 */
	if (unlikely(!reset_value))
		goto out;

	for (i = 0; i < num_counters; ++i) {
		if (!reset_value[i])
			continue;
		rdmsrl(msrs->counters[i].addr, val);
		if (val & (1ULL << (counter_width - 1)))
			continue;
		oprofile_add_sample(regs, i);
		wrmsrl(msrs->counters[i].addr, -reset_value[i]);
	}

out:
	/* Only P6 based Pentium M need to re-unmask the apic vector but it
	 * doesn't hurt other P6 variant */
	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);

	/* We can't work out if we really handled an interrupt. We
	 * might have caught a *second* counter just after overflowing
	 * the interrupt for this counter then arrives
	 * and we don't find a counter that's overflowed, so we
	 * would return 0 and get dazed + confused. Instead we always
	 * assume we found an overflow. This sucks.
	 */
	return 1;
}
Beispiel #9
0
static void intel_epb_restore(void)
{
	u64 val = this_cpu_read(saved_epb);
	u64 epb;

	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
	if (val) {
		val &= EPB_MASK;
	} else {
		/*
		 * Because intel_epb_save() has not run for the current CPU yet,
		 * it is going online for the first time, so if its EPB value is
		 * 0 ('performance') at this point, assume that it has not been
		 * initialized by the platform firmware and set it to 6
		 * ('normal').
		 */
		val = epb & EPB_MASK;
		if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
			val = ENERGY_PERF_BIAS_NORMAL;
			pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
		}
	}
	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
}
static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
{
	unsigned long mask = x86_pmu.lbr_nr - 1;
	u64 tos = intel_pmu_lbr_tos();
	int i;

	for (i = 0; i < x86_pmu.lbr_nr; i++) {
		unsigned long lbr_idx = (tos - i) & mask;
		union {
			struct {
				u32 from;
				u32 to;
			};
			u64     lbr;
		} msr_lastbranch;

		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);

		cpuc->lbr_entries[i].from  = msr_lastbranch.from;
		cpuc->lbr_entries[i].to    = msr_lastbranch.to;
		cpuc->lbr_entries[i].flags = 0;
	}
	cpuc->lbr_stack.nr = i;
}
Beispiel #11
0
/* AMD K8 machine check */
void amd_k8_mcheck_init(struct cpuinfo_x86 *c)
{
	uint64_t value;
	uint32_t i;
	int cpu_nr;

	machine_check_vector = k8_machine_check;
	cpu_nr = smp_processor_id();
	wmb();

	rdmsrl(MSR_IA32_MCG_CAP, value);
	if (value & MCG_CTL_P)	/* Control register present ? */
		wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
	nr_mce_banks = value & MCG_CAP_COUNT;

	for (i = 0; i < nr_mce_banks; i++) {
		switch (i) {
		case 4: /* Northbridge */
			/* Enable error reporting of all errors,
			 * enable error checking and
			 * disable sync flooding */
			wrmsrl(MSR_IA32_MC4_CTL, 0x02c3c008ffffffffULL);
			wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
			break;

		default:
			/* Enable error reporting of all errors */
			wrmsrl(MSR_IA32_MC0_CTL + 4 * i, 0xffffffffffffffffULL);
			wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
			break;
		}
	}

	set_in_cr4(X86_CR4_MCE);
	printk("CPU%i: AMD K8 machine check reporting enabled.\n", cpu_nr);
}
Beispiel #12
0
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
	struct perf_event *event = pcpu->event;
	struct hw_perf_event *hwc = &event->hw;
	struct perf_sample_data data;
	struct perf_raw_record raw;
	struct pt_regs regs;
	struct perf_ibs_data ibs_data;
	int offset, size, check_rip, offset_max, throttle = 0;
	unsigned int msr;
	u64 *buf, *config, period;

	if (!test_bit(IBS_STARTED, pcpu->state)) {
		/*
		 * Catch spurious interrupts after stopping IBS: After
		 * disabling IBS there could be still incoming NMIs
		 * with samples that even have the valid bit cleared.
		 * Mark all this NMIs as handled.
		 */
		return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
	}

	msr = hwc->config_base;
	buf = ibs_data.regs;
	rdmsrl(msr, *buf);
	if (!(*buf++ & perf_ibs->valid_mask))
		return 0;

	config = &ibs_data.regs[0];
	perf_ibs_event_update(perf_ibs, event, config);
	perf_sample_data_init(&data, 0, hwc->last_period);
	if (!perf_ibs_set_period(perf_ibs, hwc, &period))
		goto out;	/* no sw counter overflow */

	ibs_data.caps = ibs_caps;
	size = 1;
	offset = 1;
	check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
	if (event->attr.sample_type & PERF_SAMPLE_RAW)
		offset_max = perf_ibs->offset_max;
	else if (check_rip)
		offset_max = 2;
	else
		offset_max = 1;
	do {
		rdmsrl(msr + offset, *buf++);
		size++;
		offset = find_next_bit(perf_ibs->offset_mask,
				       perf_ibs->offset_max,
				       offset + 1);
	} while (offset < offset_max);
	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
		/*
		 * Read IbsBrTarget and IbsOpData4 separately
		 * depending on their availability.
		 * Can't add to offset_max as they are staggered
		 */
		if (ibs_caps & IBS_CAPS_BRNTRGT) {
			rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
			size++;
		}
		if (ibs_caps & IBS_CAPS_OPDATA4) {
			rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
			size++;
		}
	}
	ibs_data.size = sizeof(u64) * size;

	regs = *iregs;
	if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
		regs.flags &= ~PERF_EFLAGS_EXACT;
	} else {
		set_linear_ip(&regs, ibs_data.regs[1]);
		regs.flags |= PERF_EFLAGS_EXACT;
	}

	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
		raw.size = sizeof(u32) + ibs_data.size;
		raw.data = ibs_data.data;
		data.raw = &raw;
	}

	throttle = perf_event_overflow(event, &data, &regs);
out:
	if (throttle)
		perf_ibs_disable_event(perf_ibs, hwc, *config);
	else
		perf_ibs_enable_event(perf_ibs, hwc, period >> 4);

	perf_event_update_userpage(event);

	return 1;
}
Beispiel #13
0
static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
    unsigned int i;
    unsigned int valid_states = 0;
    unsigned int cpu = policy->cpu;
    struct acpi_cpufreq_data *data;
    unsigned int result = 0;
    struct processor_performance *perf;
    u32 max_hw_pstate;
    uint64_t msr_content;
    struct cpuinfo_x86 *c = &cpu_data[policy->cpu];

    data = xzalloc(struct acpi_cpufreq_data);
    if (!data)
        return -ENOMEM;

    cpufreq_drv_data[cpu] = data;

    data->acpi_data = &processor_pminfo[cpu]->perf;

    perf = data->acpi_data;
    policy->shared_type = perf->shared_type;

    if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
        policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
        cpumask_set_cpu(cpu, policy->cpus);
        if (cpumask_weight(policy->cpus) != 1) {
            printk(XENLOG_WARNING "Unsupported sharing type %d (%u CPUs)\n",
                   policy->shared_type, cpumask_weight(policy->cpus));
            result = -ENODEV;
            goto err_unreg;
        }
    } else {
        cpumask_copy(policy->cpus, cpumask_of(cpu));
    }

    /* capability check */
    if (perf->state_count <= 1) {
        printk("No P-States\n");
        result = -ENODEV;
        goto err_unreg;
    }
    rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content);
    max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;

    if (perf->control_register.space_id != perf->status_register.space_id) {
        result = -ENODEV;
        goto err_unreg;
    }

    data->freq_table = xmalloc_array(struct cpufreq_frequency_table, 
                                    (perf->state_count+1));
    if (!data->freq_table) {
        result = -ENOMEM;
        goto err_unreg;
    }

    /* detect transition latency */
    policy->cpuinfo.transition_latency = 0;
    for (i=0; i<perf->state_count; i++) {
        if ((perf->states[i].transition_latency * 1000) >
            policy->cpuinfo.transition_latency)
            policy->cpuinfo.transition_latency =
                perf->states[i].transition_latency * 1000;
    }

    policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;

    /* table init */
    for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
        if (i > 0 && perf->states[i].core_frequency >=
            data->freq_table[valid_states-1].frequency / 1000)
            continue;

        data->freq_table[valid_states].index = perf->states[i].control & HW_PSTATE_MASK;
        data->freq_table[valid_states].frequency =
            perf->states[i].core_frequency * 1000;
        valid_states++;
    }
    data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
    perf->state = 0;

    result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
    if (result)
        goto err_freqfree;

    if (c->cpuid_level >= 6)
        on_selected_cpus(cpumask_of(cpu), feature_detect, policy, 1);
      
    /*
     * the first call to ->target() should result in us actually
     * writing something to the appropriate registers.
     */
    data->arch_cpu_flags |= ARCH_CPU_FLAG_RESUME;

    policy->cur = data->freq_table[i].frequency;
    return result;

err_freqfree:
    xfree(data->freq_table);
err_unreg:
    xfree(data);
    cpufreq_drv_data[cpu] = NULL;

    return result;
}
Beispiel #14
0
/* P4/Xeon Thermal regulation detect and init */
static void intel_init_thermal(struct cpuinfo_x86 *c)
{
    uint64_t msr_content;
    uint32_t val;
    int tm2 = 0;
    unsigned int cpu = smp_processor_id();
    static uint8_t thermal_apic_vector;

    if (!intel_thermal_supported(c))
        return; /* -ENODEV */

    /* first check if its enabled already, in which case there might
     * be some SMM goo which handles it, so we can't even put a handler
     * since it might be delivered via SMI already -zwanem.
     */
    rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
    val = lvtthmr_init;
    /*
     * The initial value of thermal LVT entries on all APs always reads
     * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
     * sequence to them and LVT registers are reset to 0s except for
     * the mask bits which are set to 1s when APs receive INIT IPI.
     * If BIOS takes over the thermal interrupt and sets its interrupt
     * delivery mode to SMI (not fixed), it restores the value that the
     * BIOS has programmed on AP based on BSP's info we saved (since BIOS
     * is required to set the same value for all threads/cores).
     */
    if ((val & APIC_MODE_MASK) != APIC_DM_FIXED
        || (val & APIC_VECTOR_MASK) > 0xf)
        apic_write(APIC_LVTTHMR, val);

    if ((msr_content & (1ULL<<3))
        && (val & APIC_MODE_MASK) == APIC_DM_SMI) {
        if (c == &boot_cpu_data)
            printk(KERN_DEBUG "Thermal monitoring handled by SMI\n");
        return; /* -EBUSY */
    }

    if (cpu_has(c, X86_FEATURE_TM2) && (msr_content & (1ULL << 13)))
        tm2 = 1;

    /* check whether a vector already exists, temporarily masked? */
    if (val & APIC_VECTOR_MASK) {
        if (c == &boot_cpu_data)
            printk(KERN_DEBUG "Thermal LVT vector (%#x) already installed\n",
                   val & APIC_VECTOR_MASK);
        return; /* -EBUSY */
    }

    alloc_direct_apic_vector(&thermal_apic_vector, intel_thermal_interrupt);

    /* The temperature transition interrupt handler setup */
    val = thermal_apic_vector;    /* our delivery vector */
    val |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
    apic_write_around(APIC_LVTTHMR, val);

    rdmsrl(MSR_IA32_THERM_INTERRUPT, msr_content);
    wrmsrl(MSR_IA32_THERM_INTERRUPT, msr_content | 0x03);

    rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
    wrmsrl(MSR_IA32_MISC_ENABLE, msr_content | (1ULL<<3));

    apic_write_around(APIC_LVTTHMR, val & ~APIC_LVT_MASKED);
    if (opt_cpu_info)
        printk(KERN_INFO "CPU%u: Thermal monitoring enabled (%s)\n",
                cpu, tm2 ? "TM2" : "TM1");
    return;
}
Beispiel #15
0
/* Machine Check Handler for AMD K8 family series */
void k8_machine_check(struct cpu_user_regs *regs, long error_code)
{
	struct vcpu *vcpu = current;
	struct domain *curdom;
	struct mc_info *mc_data;
	struct mcinfo_global mc_global;
	struct mcinfo_bank mc_info;
	uint64_t status, addrv, miscv, uc;
	uint32_t i;
	unsigned int cpu_nr;
	uint32_t xen_impacted = 0;
#define DOM_NORMAL	0
#define DOM0_TRAP	1
#define DOMU_TRAP	2
#define DOMU_KILLED	4
	uint32_t dom_state = DOM_NORMAL;

	/* This handler runs as interrupt gate. So IPIs from the
	 * polling service routine are defered until we finished.
	 */

        /* Disable interrupts for the _vcpu_. It may not re-scheduled to
	 * an other physical CPU or the impacted process in the guest
	 * continues running with corrupted data, otherwise. */
        vcpu_schedule_lock_irq(vcpu);

	mc_data = x86_mcinfo_getptr();
	cpu_nr = smp_processor_id();
	curdom = vcpu->domain;

	memset(&mc_global, 0, sizeof(mc_global));
	mc_global.common.type = MC_TYPE_GLOBAL;
	mc_global.common.size = sizeof(mc_global);

	mc_global.mc_domid = curdom->domain_id; /* impacted domain */
	mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
	BUG_ON(cpu_nr != vcpu->processor);
	mc_global.mc_core_threadid = 0;
	mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
#if 0 /* TODO: on which socket is this physical core?
         It's not clear to me how to figure this out. */
	mc_global.mc_socketid = ???;
#endif
	mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
	rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);

	/* Quick check, who is impacted */
	xen_impacted = is_idle_domain(curdom);

	/* Dom0 */
	x86_mcinfo_clear(mc_data);
	x86_mcinfo_add(mc_data, &mc_global);

	for (i = 0; i < nr_mce_banks; i++) {
		struct domain *d;

		rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);

		if (!(status & MCi_STATUS_VAL))
			continue;

		/* An error happened in this bank.
		 * This is expected to be an uncorrectable error,
		 * since correctable errors get polled.
		 */
		uc = status & MCi_STATUS_UC;

		memset(&mc_info, 0, sizeof(mc_info));
		mc_info.common.type = MC_TYPE_BANK;
		mc_info.common.size = sizeof(mc_info);
		mc_info.mc_bank = i;
		mc_info.mc_status = status;

		addrv = 0;
		if (status & MCi_STATUS_ADDRV) {
			rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
			
			d = maddr_get_owner(addrv);
			if (d != NULL)
				mc_info.mc_domid = d->domain_id;
		}

		miscv = 0;
		if (status & MCi_STATUS_MISCV)
			rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);

		mc_info.mc_addr = addrv;
		mc_info.mc_misc = miscv;

		x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */

		if (mc_callback_bank_extended)
			mc_callback_bank_extended(mc_data, i, status);

		/* clear status */
		wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
		wmb();
		add_taint(TAINT_MACHINE_CHECK);
	}

	status = mc_global.mc_gstatus;

	/* clear MCIP or cpu enters shutdown state
	 * in case another MCE occurs. */
	status &= ~MCG_STATUS_MCIP;
	wrmsrl(MSR_IA32_MCG_STATUS, status);
	wmb();

	/* For the details see the discussion "MCE/MCA concept" on xen-devel.
	 * The thread started here:
	 * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
	 */

	/* MCG_STATUS_RIPV: 
	 * When this bit is not set, then the instruction pointer onto the stack
	 * to resume at is not valid. If xen is interrupted, then we panic anyway
	 * right below. Otherwise it is up to the guest to figure out if 
	 * guest kernel or guest userland is affected and should kill either
	 * itself or the affected process.
	 */

	/* MCG_STATUS_EIPV:
	 * Evaluation of EIPV is the job of the guest.
	 */

	if (xen_impacted) {
		/* Now we are going to panic anyway. Allow interrupts, so that
		 * printk on serial console can work. */
		vcpu_schedule_unlock_irq(vcpu);

		/* Uh, that means, machine check exception
		 * inside Xen occured. */
		printk("Machine check exception occured in Xen.\n");

		/* if MCG_STATUS_EIPV indicates, the IP on the stack is related
		 * to the error then it makes sense to print a stack trace.
		 * That can be useful for more detailed error analysis and/or
		 * error case studies to figure out, if we can clear
		 * xen_impacted and kill a DomU instead
		 * (i.e. if a guest only control structure is affected, but then
		 * we must ensure the bad pages are not re-used again).
		 */
		if (status & MCG_STATUS_EIPV) {
			printk("MCE: Instruction Pointer is related to the error. "
				"Therefore, print the execution state.\n");
			show_execution_state(regs);
		}
		x86_mcinfo_dump(mc_data);
		mc_panic("End of MCE. Use mcelog to decode above error codes.\n");
	}

	/* If Dom0 registered a machine check handler, which is only possible
	 * with a PV MCA driver, then ... */
	if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
		dom_state = DOM0_TRAP;

		/* ... deliver machine check trap to Dom0. */
		send_guest_trap(dom0, 0, TRAP_machine_check);

		/* Xen may tell Dom0 now to notify the DomU.
		 * But this will happen through a hypercall. */
	} else
		/* Dom0 did not register a machine check handler, but if DomU
		 * did so, then... */
                if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) {
			dom_state = DOMU_TRAP;

			/* ... deliver machine check trap to DomU */
			send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check);
	} else {
		/* hmm... noone feels responsible to handle the error.
		 * So, do a quick check if a DomU is impacted or not.
		 */
		if (curdom == dom0) {
			/* Dom0 is impacted. Since noone can't handle
			 * this error, panic! */
			x86_mcinfo_dump(mc_data);
			mc_panic("MCE occured in Dom0, which it can't handle\n");

			/* UNREACHED */
		} else {
			dom_state = DOMU_KILLED;

			/* Enable interrupts. This basically results in
			 * calling sti on the *physical* cpu. But after
			 * domain_crash() the vcpu pointer is invalid.
			 * Therefore, we must unlock the irqs before killing
			 * it. */
			vcpu_schedule_unlock_irq(vcpu);

			/* DomU is impacted. Kill it and continue. */
			domain_crash(curdom);
		}
	}


	switch (dom_state) {
	case DOM0_TRAP:
	case DOMU_TRAP:
		/* Enable interrupts. */
		vcpu_schedule_unlock_irq(vcpu);

		/* guest softirqs and event callbacks are scheduled
		 * immediately after this handler exits. */
		break;
	case DOMU_KILLED:
		/* Nothing to do here. */
		break;
	default:
		BUG();
	}
}
Beispiel #16
0
static inline s64 read_mperf(void)
{
	u64 u;
	rdmsrl(MSR_IA32_MPERF,u);
	return u;
}
Beispiel #17
0
int vmx_cpu_up(void)
{
    u32 eax, edx;
    int bios_locked, cpu = smp_processor_id();
    u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;

    BUG_ON(!(read_cr4() & X86_CR4_VMXE));

    /* 
     * Ensure the current processor operating mode meets 
     * the requred CRO fixed bits in VMX operation. 
     */
    cr0 = read_cr0();
    rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
    rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
    if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
    {
        printk("CPU%d: some settings of host CR0 are " 
               "not allowed in VMX operation.\n", cpu);
        return 0;
    }

    rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);

    bios_locked = !!(eax & IA32_FEATURE_CONTROL_MSR_LOCK);
    if ( bios_locked )
    {
        if ( !(eax & (IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX |
                      IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) )
        {
            printk("CPU%d: VMX disabled by BIOS.\n", cpu);
            return 0;
        }
    }
    else
    {
        eax  = IA32_FEATURE_CONTROL_MSR_LOCK;
        eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX;
        if ( test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) )
            eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX;
        wrmsr(IA32_FEATURE_CONTROL_MSR, eax, 0);
    }

    vmx_init_vmcs_config();

    INIT_LIST_HEAD(&this_cpu(active_vmcs_list));

    if ( this_cpu(host_vmcs) == NULL )
    {
        this_cpu(host_vmcs) = vmx_alloc_vmcs();
        if ( this_cpu(host_vmcs) == NULL )
        {
            printk("CPU%d: Could not allocate host VMCS\n", cpu);
            return 0;
        }
    }

    switch ( __vmxon(virt_to_maddr(this_cpu(host_vmcs))) )
    {
    case -2: /* #UD or #GP */
        if ( bios_locked &&
             test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) &&
             (!(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX) ||
              !(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) )
        {
            printk("CPU%d: VMXON failed: perhaps because of TXT settings "
                   "in your BIOS configuration?\n", cpu);
            printk(" --> Disable TXT in your BIOS unless using a secure "
                   "bootloader.\n");
            return 0;
        }
        /* fall through */
    case -1: /* CF==1 or ZF==1 */
        printk("CPU%d: unexpected VMXON failure\n", cpu);
        return 0;
    case 0: /* success */
        break;
    default:
        BUG();
    }

    return 1;
}
static void __init setup_real_mode(void)
{
	u16 real_mode_seg;
	const u32 *rel;
	u32 count;
	unsigned char *base;
	unsigned long phys_base;
	struct trampoline_header *trampoline_header;
	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
#ifdef CONFIG_X86_64
	u64 *trampoline_pgd;
	u64 efer;
#endif

	base = (unsigned char *)real_mode_header;

	memcpy(base, real_mode_blob, size);

	phys_base = __pa(base);
	real_mode_seg = phys_base >> 4;

	rel = (u32 *) real_mode_relocs;

	/* 16-bit segment relocations. */
	count = *rel++;
	while (count--) {
		u16 *seg = (u16 *) (base + *rel++);
		*seg = real_mode_seg;
	}

	/* 32-bit linear relocations. */
	count = *rel++;
	while (count--) {
		u32 *ptr = (u32 *) (base + *rel++);
		*ptr += phys_base;
	}

	/* Must be perfomed *after* relocation. */
	trampoline_header = (struct trampoline_header *)
		__va(real_mode_header->trampoline_header);

#ifdef CONFIG_X86_32
	trampoline_header->start = __pa_symbol(startup_32_smp);
	trampoline_header->gdt_limit = __BOOT_DS + 7;
	trampoline_header->gdt_base = __pa_symbol(boot_gdt);
#else
	/*
	 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
	 * so we need to mask it out.
	 */
	rdmsrl(MSR_EFER, efer);
	trampoline_header->efer = efer & ~EFER_LMA;

	trampoline_header->start = (u64) secondary_startup_64;
	trampoline_cr4_features = &trampoline_header->cr4;
	*trampoline_cr4_features = mmu_cr4_features;

	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
	trampoline_pgd[511] = init_level4_pgt[511].pgd;
#endif
}
Beispiel #19
0
void __init check_bugs(void)
{
	identify_boot_cpu();

	/*
	 * identify_boot_cpu() initialized SMT support information, let the
	 * core code know.
	 */
	cpu_smt_check_topology();

	if (!IS_ENABLED(CONFIG_SMP)) {
		pr_info("CPU: ");
		print_cpu_info(&boot_cpu_data);
	}

	/*
	 * Read the SPEC_CTRL MSR to account for reserved bits which may
	 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
	 * init code as it is not enumerated and depends on the family.
	 */
	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);

	/* Allow STIBP in MSR_SPEC_CTRL if supported */
	if (boot_cpu_has(X86_FEATURE_STIBP))
		x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;

	/* Select the proper spectre mitigation before patching alternatives */
	spectre_v2_select_mitigation();

	/*
	 * Select proper mitigation for any exposure to the Speculative Store
	 * Bypass vulnerability.
	 */
	ssb_select_mitigation();

	l1tf_select_mitigation();

#ifdef CONFIG_X86_32
	/*
	 * Check whether we are able to run this kernel safely on SMP.
	 *
	 * - i386 is no longer supported.
	 * - In order to run on anything without a TSC, we need to be
	 *   compiled for a i486.
	 */
	if (boot_cpu_data.x86 < 4)
		panic("Kernel requires i486+ for 'invlpg' and other features");

	init_utsname()->machine[1] =
		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
	alternative_instructions();

	fpu__init_check_bugs();
#else /* CONFIG_X86_64 */
	alternative_instructions();

	/*
	 * Make sure the first 2MB area is not mapped by huge pages
	 * There are typically fixed size MTRRs in there and overlapping
	 * MTRRs into large pages causes slow downs.
	 *
	 * Right now we don't do that with gbpages because there seems
	 * very little benefit for that case.
	 */
	if (!direct_gbpages)
		set_memory_4k((unsigned long)__va(0), 1);
#endif
}
Beispiel #20
0
static inline s64 read_pctr0(void)
{
	s64 v;
	rdmsrl(MSR_P6_PERFCTR0,v);
   return v;
}
static void early_init_intel(struct cpuinfo_x86 *c)
{
	u64 misc_enable;

	/* Unmask CPUID levels if masked: */
	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
				  MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
			c->cpuid_level = cpuid_eax(0);
			get_cpu_cap(c);
		}
	}

	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
		(c->x86 == 0x6 && c->x86_model >= 0x0e))
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);

	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
		unsigned lower_word;

		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
		/* Required by the SDM */
		sync_core();
		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
	}

	/*
	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
	 *
	 * A race condition between speculative fetches and invalidating
	 * a large page.  This is worked around in microcode, but we
	 * need the microcode to have already been loaded... so if it is
	 * not, recommend a BIOS update and disable large pages.
	 */
	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
	    c->microcode < 0x20e) {
		pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
		clear_cpu_cap(c, X86_FEATURE_PSE);
	}

#ifdef CONFIG_X86_64
	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#else
	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
	if (c->x86 == 15 && c->x86_cache_alignment == 64)
		c->x86_cache_alignment = 128;
#endif

	/* CPUID workaround for 0F33/0F34 CPU */
	if (c->x86 == 0xF && c->x86_model == 0x3
	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
		c->x86_phys_bits = 36;

	/*
	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
	 * with P/T states and does not stop in deep C-states.
	 *
	 * It is also reliable across cores and sockets. (but not across
	 * cabinets - we turn it off in that case explicitly.)
	 */
	if (c->x86_power & (1 << 8)) {
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
		if (!check_tsc_unstable())
			set_sched_clock_stable();
	}

	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
	if (c->x86 == 6) {
		switch (c->x86_model) {
		case 0x27:	/* Penwell */
		case 0x35:	/* Cloverview */
		case 0x4a:	/* Merrifield */
			set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
			break;
		default:
			break;
		}
	}

	/*
	 * There is a known erratum on Pentium III and Core Solo
	 * and Core Duo CPUs.
	 * " Page with PAT set to WC while associated MTRR is UC
	 *   may consolidate to UC "
	 * Because of this erratum, it is better to stick with
	 * setting WC in MTRR rather than using PAT on these CPUs.
	 *
	 * Enable PAT WC only on P4, Core 2 or later CPUs.
	 */
	if (c->x86 == 6 && c->x86_model < 15)
		clear_cpu_cap(c, X86_FEATURE_PAT);

#ifdef CONFIG_KMEMCHECK
	/*
	 * P4s have a "fast strings" feature which causes single-
	 * stepping REP instructions to only generate a #DB on
	 * cache-line boundaries.
	 *
	 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
	 * (model 2) with the same problem.
	 */
	if (c->x86 == 15)
		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
				  MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
			pr_info("kmemcheck: Disabling fast string operations\n");
#endif

	/*
	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
	 * clear the fast string and enhanced fast string CPU capabilities.
	 */
	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
			pr_info("Disabled fast string operations\n");
			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
			setup_clear_cpu_cap(X86_FEATURE_ERMS);
		}
	}

	/*
	 * Intel Quark Core DevMan_001.pdf section 6.4.11
	 * "The operating system also is required to invalidate (i.e., flush)
	 *  the TLB when any changes are made to any of the page table entries.
	 *  The operating system must reload CR3 to cause the TLB to be flushed"
	 *
	 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
	 * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
	 * to be modified.
	 */
	if (c->x86 == 5 && c->x86_model == 9) {
		pr_info("Disabling PGE capability bit\n");
		setup_clear_cpu_cap(X86_FEATURE_PGE);
	}

	if (c->cpuid_level >= 0x00000001) {
		u32 eax, ebx, ecx, edx;

		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
		/*
		 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
		 * apicids which are reserved per package. Store the resulting
		 * shift value for the package management code.
		 */
		if (edx & (1U << 28))
			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
	}
Beispiel #22
0
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
	u32 l, h;
	int mbytes = num_physpages >> (20-PAGE_SHIFT);
	int r;

#ifdef CONFIG_SMP
	unsigned long long value;

	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
	 * bit 6 of msr C001_0015
	 *
	 * Errata 63 for SH-B3 steppings
	 * Errata 122 for all steppings (F+ have it disabled by default)
	 */
	if (c->x86 == 15) {
		rdmsrl(MSR_K7_HWCR, value);
		value |= 1 << 6;
		wrmsrl(MSR_K7_HWCR, value);
	}
#endif

	early_init_amd(c);

	/*
	 *	FIXME: We should handle the K5 here. Set up the write
	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
	 *	no bus pipeline)
	 */

	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
	clear_bit(0*32+31, c->x86_capability);
	
	r = get_model_name(c);

	switch(c->x86)
	{
		case 4:
		/*
		 * General Systems BIOSen alias the cpu frequency registers
		 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
		 * drivers subsequently pokes it, and changes the CPU speed.
		 * Workaround : Remove the unneeded alias.
		 */
#define CBAR		(0xfffc) /* Configuration Base Address  (32-bit) */
#define CBAR_ENB	(0x80000000)
#define CBAR_KEY	(0X000000CB)
			if (c->x86_model==9 || c->x86_model == 10) {
				if (inl (CBAR) & CBAR_ENB)
					outl (0 | CBAR_KEY, CBAR);
			}
			break;
		case 5:
			if( c->x86_model < 6 )
			{
				/* Based on AMD doc 20734R - June 2000 */
				if ( c->x86_model == 0 ) {
					clear_bit(X86_FEATURE_APIC, c->x86_capability);
					set_bit(X86_FEATURE_PGE, c->x86_capability);
				}
				break;
			}
			
			if ( c->x86_model == 6 && c->x86_mask == 1 ) {
				const int K6_BUG_LOOP = 1000000;
				int n;
				void (*f_vide)(void);
				unsigned long d, d2;
				
				printk(KERN_INFO "AMD K6 stepping B detected - ");
				
				/*
				 * It looks like AMD fixed the 2.6.2 bug and improved indirect 
				 * calls at the same time.
				 */

				n = K6_BUG_LOOP;
				f_vide = vide;
				rdtscl(d);
				while (n--) 
					f_vide();
				rdtscl(d2);
				d = d2-d;

				if (d > 20*K6_BUG_LOOP) 
					printk("system stability may be impaired when more than 32 MB are used.\n");
				else 
					printk("probably OK (after B9730xxxx).\n");
				printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
			}

			/* K6 with old style WHCR */
			if (c->x86_model < 8 ||
			   (c->x86_model== 8 && c->x86_mask < 8)) {
				/* We can only write allocate on the low 508Mb */
				if(mbytes>508)
					mbytes=508;

				rdmsr(MSR_K6_WHCR, l, h);
				if ((l&0x0000FFFF)==0) {
					unsigned long flags;
					l=(1<<0)|((mbytes/4)<<1);
					local_irq_save(flags);
					wbinvd();
					wrmsr(MSR_K6_WHCR, l, h);
					local_irq_restore(flags);
					printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
						mbytes);
				}
				break;
			}

			if ((c->x86_model == 8 && c->x86_mask >7) ||
			     c->x86_model == 9 || c->x86_model == 13) {
				/* The more serious chips .. */

				if(mbytes>4092)
					mbytes=4092;

				rdmsr(MSR_K6_WHCR, l, h);
				if ((l&0xFFFF0000)==0) {
					unsigned long flags;
					l=((mbytes>>2)<<22)|(1<<16);
					local_irq_save(flags);
					wbinvd();
					wrmsr(MSR_K6_WHCR, l, h);
					local_irq_restore(flags);
					printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
						mbytes);
				}

				/*  Set MTRR capability flag if appropriate */
				if (c->x86_model == 13 || c->x86_model == 9 ||
				   (c->x86_model == 8 && c->x86_mask >= 8))
					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
				break;
			}

			if (c->x86_model == 10) {
				/* AMD Geode LX is model 10 */
				/* placeholder for any needed mods */
				break;
			}
			break;
		case 6: /* An Athlon/Duron */
 
			/* Bit 15 of Athlon specific MSR 15, needs to be 0
 			 * to enable SSE on Palomino/Morgan/Barton CPU's.
			 * If the BIOS didn't enable it already, enable it here.
			 */
			if (c->x86_model >= 6 && c->x86_model <= 10) {
				if (!cpu_has(c, X86_FEATURE_XMM)) {
					printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
					rdmsr(MSR_K7_HWCR, l, h);
					l &= ~0x00008000;
					wrmsr(MSR_K7_HWCR, l, h);
					set_bit(X86_FEATURE_XMM, c->x86_capability);
				}
			}

			/* It's been determined by AMD that Athlons since model 8 stepping 1
			 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
			 * As per AMD technical note 27212 0.2
			 */
			if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
				rdmsr(MSR_K7_CLK_CTL, l, h);
				if ((l & 0xfff00000) != 0x20000000) {
					printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
						((l & 0x000fffff)|0x20000000));
					wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
				}
			}
			break;
	}
Beispiel #23
0
static int __init detect_init_APIC (void)
{
    uint64_t msr_content;
    u32 features;

    /* Disabled by kernel option? */
    if (enable_local_apic < 0)
        return -1;

    switch (boot_cpu_data.x86_vendor) {
    case X86_VENDOR_AMD:
        if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
            (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x17))
            break;
        goto no_apic;
    case X86_VENDOR_INTEL:
        if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
            (boot_cpu_data.x86 == 5 && cpu_has_apic))
            break;
        goto no_apic;
    default:
        goto no_apic;
    }

    if (!cpu_has_apic) {
        /*
         * Over-ride BIOS and try to enable the local
         * APIC only if "lapic" specified.
         */
        if (enable_local_apic <= 0) {
            printk("Local APIC disabled by BIOS -- "
                   "you can enable it with \"lapic\"\n");
            return -1;
        }
        /*
         * Some BIOSes disable the local APIC in the
         * APIC_BASE MSR. This can only be done in
         * software for Intel P6 or later and AMD K7
         * (Model > 1) or later.
         */
        rdmsrl(MSR_IA32_APICBASE, msr_content);
        if (!(msr_content & MSR_IA32_APICBASE_ENABLE)) {
            printk("Local APIC disabled by BIOS -- reenabling.\n");
            msr_content &= ~MSR_IA32_APICBASE_BASE;
            msr_content |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
            wrmsrl(MSR_IA32_APICBASE,
                msr_content | MSR_IA32_APICBASE_ENABLE
                | APIC_DEFAULT_PHYS_BASE);
            enabled_via_apicbase = 1;
        }
    }
    /*
     * The APIC feature bit should now be enabled
     * in `cpuid'
     */
    features = cpuid_edx(1);
    if (!(features & (1 << X86_FEATURE_APIC))) {
        printk("Could not enable APIC!\n");
        return -1;
    }

    set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;

    /* The BIOS may have set up the APIC at some other address */
    rdmsrl(MSR_IA32_APICBASE, msr_content);
    if (msr_content & MSR_IA32_APICBASE_ENABLE)
        mp_lapic_addr = msr_content & MSR_IA32_APICBASE_BASE;

    if (nmi_watchdog != NMI_NONE)
        nmi_watchdog = NMI_LOCAL_APIC;

    printk("Found and enabled local APIC!\n");

    apic_pm_activate();

    return 0;

no_apic:
    printk("No local APIC present or hardware disabled\n");
    return -1;
}
Beispiel #24
0
/*
 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
 * on this CPU. Use the algorithm recommended in the SDM to discover shared
 * banks.
 */
static void cmci_discover(int banks)
{
	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
	unsigned long flags;
	int i;
	int bios_wrong_thresh = 0;

	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
	for (i = 0; i < banks; i++) {
		u64 val;
		int bios_zero_thresh = 0;

		if (test_bit(i, owned))
			continue;

		/* Skip banks in firmware first mode */
		if (test_bit(i, mce_banks_ce_disabled))
			continue;

		rdmsrl(MSR_IA32_MCx_CTL2(i), val);

		/* Already owned by someone else? */
		if (val & MCI_CTL2_CMCI_EN) {
			clear_bit(i, owned);
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			continue;
		}

		if (!mca_cfg.bios_cmci_threshold) {
			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
			val |= CMCI_THRESHOLD;
		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
			/*
			 * If bios_cmci_threshold boot option was specified
			 * but the threshold is zero, we'll try to initialize
			 * it to 1.
			 */
			bios_zero_thresh = 1;
			val |= CMCI_THRESHOLD;
		}

		val |= MCI_CTL2_CMCI_EN;
		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);

		/* Did the enable bit stick? -- the bank supports CMCI */
		if (val & MCI_CTL2_CMCI_EN) {
			set_bit(i, owned);
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			/*
			 * We are able to set thresholds for some banks that
			 * had a threshold of 0. This means the BIOS has not
			 * set the thresholds properly or does not work with
			 * this boot option. Note down now and report later.
			 */
			if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
				bios_wrong_thresh = 1;
		} else {
			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
		}
	}
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
		pr_info_once(
			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
		pr_info_once(
			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
	}
}
Beispiel #25
0
static int construct_vmcs(struct vcpu *v)
{
    uint16_t sysenter_cs;
    unsigned long sysenter_eip;

    vmx_vmcs_enter(v);

    /* VMCS controls. */
    __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
    v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
    if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
        __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);

    /* MSR access bitmap. */
    if ( cpu_has_vmx_msr_bitmap )
    {
        char *msr_bitmap = alloc_xenheap_page();

        if ( msr_bitmap == NULL )
            return -ENOMEM;

        memset(msr_bitmap, ~0, PAGE_SIZE);
        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));

        vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
        vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
    }

    /* I/O access bitmap. */
    __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
    __vmwrite(IO_BITMAP_B, virt_to_maddr(hvm_io_bitmap + PAGE_SIZE));

    /* Host GDTR base. */
    __vmwrite(HOST_GDTR_BASE, GDT_VIRT_START(v));

    /* Host data selectors. */
    __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
    __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
    __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
    __vmwrite(HOST_FS_SELECTOR, 0);
    __vmwrite(HOST_GS_SELECTOR, 0);
    __vmwrite(HOST_FS_BASE, 0);
    __vmwrite(HOST_GS_BASE, 0);

    /* Host control registers. */
    v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
    __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
    __vmwrite(HOST_CR4, mmu_cr4_features);

    /* Host CS:RIP. */
    __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
    __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);

    /* Host SYSENTER CS:RIP. */
    rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
    __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
    rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
    __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);

    /* MSR intercepts. */
    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);

    __vmwrite(VM_ENTRY_INTR_INFO, 0);

    __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
    __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);

    __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
    __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);

    __vmwrite(CR3_TARGET_COUNT, 0);

    __vmwrite(GUEST_ACTIVITY_STATE, 0);

    /* Guest segment bases. */
    __vmwrite(GUEST_ES_BASE, 0);
    __vmwrite(GUEST_SS_BASE, 0);
    __vmwrite(GUEST_DS_BASE, 0);
    __vmwrite(GUEST_FS_BASE, 0);
    __vmwrite(GUEST_GS_BASE, 0);
    __vmwrite(GUEST_CS_BASE, 0);

    /* Guest segment limits. */
    __vmwrite(GUEST_ES_LIMIT, ~0u);
    __vmwrite(GUEST_SS_LIMIT, ~0u);
    __vmwrite(GUEST_DS_LIMIT, ~0u);
    __vmwrite(GUEST_FS_LIMIT, ~0u);
    __vmwrite(GUEST_GS_LIMIT, ~0u);
    __vmwrite(GUEST_CS_LIMIT, ~0u);

    /* Guest segment AR bytes. */
    __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
    __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
    __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
    __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
    __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
    __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */

    /* Guest IDT. */
    __vmwrite(GUEST_IDTR_BASE, 0);
    __vmwrite(GUEST_IDTR_LIMIT, 0);

    /* Guest GDT. */
    __vmwrite(GUEST_GDTR_BASE, 0);
    __vmwrite(GUEST_GDTR_LIMIT, 0);

    /* Guest LDT. */
    __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
    __vmwrite(GUEST_LDTR_SELECTOR, 0);
    __vmwrite(GUEST_LDTR_BASE, 0);
    __vmwrite(GUEST_LDTR_LIMIT, 0);

    /* Guest TSS. */
    __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
    __vmwrite(GUEST_TR_BASE, 0);
    __vmwrite(GUEST_TR_LIMIT, 0xff);

    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
    __vmwrite(GUEST_DR7, 0);
    __vmwrite(VMCS_LINK_POINTER, ~0UL);
#if defined(__i386__)
    __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif

    __vmwrite(EXCEPTION_BITMAP, (HVM_TRAP_MASK |
                                 (1U << TRAP_page_fault) |
                                 (1U << TRAP_no_device)));

    v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
    hvm_update_guest_cr(v, 0);

    v->arch.hvm_vcpu.guest_cr[4] = 0;
    hvm_update_guest_cr(v, 4);

    if ( cpu_has_vmx_tpr_shadow )
    {
        __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
                  page_to_maddr(vcpu_vlapic(v)->regs_page));
        __vmwrite(TPR_THRESHOLD, 0);
    }

    vmx_vmcs_exit(v);

    paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */

    vmx_vlapic_msr_changed(v);

    return 0;
}
static int __init extlog_init(void)
{
	struct extlog_l1_head *l1_head;
	void __iomem *extlog_l1_hdr;
	size_t l1_hdr_size;
	struct resource *r;
	u64 cap;
	int rc;

	rc = -ENODEV;

	rdmsrl(MSR_IA32_MCG_CAP, cap);
	if (!(cap & MCG_ELOG_P))
		return rc;

	if (!extlog_get_l1addr())
		return rc;

	rc = -EINVAL;
	/* get L1 header to fetch necessary information */
	l1_hdr_size = sizeof(struct extlog_l1_head);
	r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR");
	if (!r) {
		pr_warn(FW_BUG EMCA_BUG,
			(unsigned long long)l1_dirbase,
			(unsigned long long)l1_dirbase + l1_hdr_size);
		goto err;
	}

	extlog_l1_hdr = acpi_os_map_memory(l1_dirbase, l1_hdr_size);
	l1_head = (struct extlog_l1_head *)extlog_l1_hdr;
	l1_size = l1_head->total_len;
	l1_percpu_entry = l1_head->entries;
	elog_base = l1_head->elog_base;
	elog_size = l1_head->elog_len;
	acpi_os_unmap_memory(extlog_l1_hdr, l1_hdr_size);
	release_mem_region(l1_dirbase, l1_hdr_size);

	/* remap L1 header again based on completed information */
	r = request_mem_region(l1_dirbase, l1_size, "L1 Table");
	if (!r) {
		pr_warn(FW_BUG EMCA_BUG,
			(unsigned long long)l1_dirbase,
			(unsigned long long)l1_dirbase + l1_size);
		goto err;
	}
	extlog_l1_addr = acpi_os_map_memory(l1_dirbase, l1_size);
	l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size);

	/* remap elog table */
	r = request_mem_region(elog_base, elog_size, "Elog Table");
	if (!r) {
		pr_warn(FW_BUG EMCA_BUG,
			(unsigned long long)elog_base,
			(unsigned long long)elog_base + elog_size);
		goto err_release_l1_dir;
	}
	elog_addr = acpi_os_map_memory(elog_base, elog_size);

	rc = -ENOMEM;
	/* allocate buffer to save elog record */
	elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL);
	if (elog_buf == NULL)
		goto err_release_elog;

	mce_register_decode_chain(&extlog_mce_dec);
	/* enable OS to be involved to take over management from BIOS */
	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;

	return 0;

err_release_elog:
	if (elog_addr)
		acpi_os_unmap_memory(elog_addr, elog_size);
	release_mem_region(elog_base, elog_size);
err_release_l1_dir:
	if (extlog_l1_addr)
		acpi_os_unmap_memory(extlog_l1_addr, l1_size);
	release_mem_region(l1_dirbase, l1_size);
err:
	pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n");
	return rc;
}
Beispiel #27
0
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
	u64 misc_enable;

	/* Unmask CPUID levels if masked: */
	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);

		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
			misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
			c->cpuid_level = cpuid_eax(0);
			get_cpu_cap(c);
		}
	}

	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
		(c->x86 == 0x6 && c->x86_model >= 0x0e))
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);

	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
		unsigned lower_word;

		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
		/* Required by the SDM */
		sync_core();
		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
	}

	/*
	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
	 *
	 * A race condition between speculative fetches and invalidating
	 * a large page.  This is worked around in microcode, but we
	 * need the microcode to have already been loaded... so if it is
	 * not, recommend a BIOS update and disable large pages.
	 */
	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
	    c->microcode < 0x20e) {
		printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
		clear_cpu_cap(c, X86_FEATURE_PSE);
	}

#ifdef CONFIG_X86_64
	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#else
	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
	if (c->x86 == 15 && c->x86_cache_alignment == 64)
		c->x86_cache_alignment = 128;
#endif

	/* CPUID workaround for 0F33/0F34 CPU */
	if (c->x86 == 0xF && c->x86_model == 0x3
	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
		c->x86_phys_bits = 36;

	/*
	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
	 * with P/T states and does not stop in deep C-states.
	 *
	 * It is also reliable across cores and sockets. (but not across
	 * cabinets - we turn it off in that case explicitly.)
	 */
	if (c->x86_power & (1 << 8)) {
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
		if (!check_tsc_unstable())
			sched_clock_stable = 1;
	}

	/*
	 * There is a known erratum on Pentium III and Core Solo
	 * and Core Duo CPUs.
	 * " Page with PAT set to WC while associated MTRR is UC
	 *   may consolidate to UC "
	 * Because of this erratum, it is better to stick with
	 * setting WC in MTRR rather than using PAT on these CPUs.
	 *
	 * Enable PAT WC only on P4, Core 2 or later CPUs.
	 */
	if (c->x86 == 6 && c->x86_model < 15)
		clear_cpu_cap(c, X86_FEATURE_PAT);

#ifdef CONFIG_KMEMCHECK
	/*
	 * P4s have a "fast strings" feature which causes single-
	 * stepping REP instructions to only generate a #DB on
	 * cache-line boundaries.
	 *
	 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
	 * (model 2) with the same problem.
	 */
	if (c->x86 == 15) {
		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);

		if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
			printk(KERN_INFO "kmemcheck: Disabling fast string operations\n");

			misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
		}
	}
#endif

	/*
	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
	 * clear the fast string and enhanced fast string CPU capabilities.
	 */
	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
			printk(KERN_INFO "Disabled fast string operations\n");
			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
			setup_clear_cpu_cap(X86_FEATURE_ERMS);
		}
	}
}
static void vtss_dsa_on_each_cpu_init(void* ctx)
{
    if (hardcfg.family == 0x06 || hardcfg.family == 0x0f) {
        rdmsrl(DS_AREA_MSR, __get_cpu_var(vtss_dsa_cpu_msr));
    }
}