Esempio n. 1
0
/*
 * Source CPU calls into this - it waits for the freshly booted
 * target CPU to arrive and then starts the measurement:
 */
void __cpuinit check_tsc_sync_source(int cpu)
{
	unsigned long flags;
	int cpus = 2;

	/*
	 * No need to check if we already know that the TSC is not
	 * synchronized:
	 */
	if (unsynchronized_tsc())
		return;

	printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
			  smp_processor_id(), cpu);

	/*
	 * Reset it - in case this is a second bootup:
	 */
	atomic_set(&stop_count, 0);

	/*
	 * Wait for the target to arrive:
	 */
	local_save_flags(flags);
	local_irq_enable();
	while (atomic_read(&start_count) != cpus-1)
		cpu_relax();
	local_irq_restore(flags);
	/*
	 * Trigger the target to continue into the measurement too:
	 */
	atomic_inc(&start_count);

	check_tsc_warp();

	while (atomic_read(&stop_count) != cpus-1)
		cpu_relax();

	/*
	 * Reset it - just in case we boot another CPU later:
	 */
	atomic_set(&start_count, 0);

	if (nr_warps) {
		printk("\n");
		printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
				    " turning off TSC clock.\n", max_warp);
		mark_tsc_unstable();
		nr_warps = 0;
		max_warp = 0;
		last_tsc = 0;
	} else {
		printk(" passed.\n");
	}

	/*
	 * Let the target continue with the bootup:
	 */
	atomic_inc(&stop_count);
}
Esempio n. 2
0
/* Hook from generic ACPI tables.c */
static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
	if (!strncmp(oem_id, "IBM", 3) &&
	    (!strncmp(oem_table_id, "SERVIGIL", 8)
	     || !strncmp(oem_table_id, "EXA", 3))){
		mark_tsc_unstable("Summit based system");
		use_cyclone = 1; /*enable cyclone-timer*/
		setup_summit();
		return 1;
	}
	return 0;
}
Esempio n. 3
0
static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
		char *productid)
{
	if (!strncmp(oem, "IBM ENSW", 8) &&
			(!strncmp(productid, "VIGIL SMP", 9)
			 || !strncmp(productid, "EXA", 3)
			 || !strncmp(productid, "RUTHLESS SMP", 12))){
		mark_tsc_unstable("Summit based system");
		use_cyclone = 1; /*enable cyclone-timer*/
		setup_summit();
		return 1;
	}
	return 0;
}
Esempio n. 4
0
static void __init ms_hyperv_init_platform(void)
{
	/*
	 * Extract the features and hints
	 */
	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);

	pr_info("HyperV: features 0x%x, hints 0x%x\n",
		ms_hyperv.features, ms_hyperv.hints);

#ifdef CONFIG_X86_LOCAL_APIC
	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
		/*
		 * Get the APIC frequency.
		 */
		u64	hv_lapic_frequency;

		rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
		hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
		lapic_timer_frequency = hv_lapic_frequency;
		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
			lapic_timer_frequency);
	}

	register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
			     "hv_nmi_unknown");
#endif

	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
		clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);

#ifdef CONFIG_X86_IO_APIC
	no_timer_check = 1;
#endif

#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
	machine_ops.shutdown = hv_machine_shutdown;
	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
	mark_tsc_unstable("running on Hyper-V");

	/*
	 * Generation 2 instances don't support reading the NMI status from
	 * 0x61 port.
	 */
	if (efi_enabled(EFI_BOOT))
		x86_platform.get_nmi_reason = hv_get_nmi_reason;
}
Esempio n. 5
0
static void power_saving_mwait_init(void)
{
	unsigned int eax, ebx, ecx, edx;
	unsigned int highest_cstate = 0;
	unsigned int highest_subcstate = 0;
	int i;

	if (!boot_cpu_has(X86_FEATURE_MWAIT))
		return;
	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
		return;

	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);

	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
		return;

	edx >>= MWAIT_SUBSTATE_SIZE;
	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
		if (edx & MWAIT_SUBSTATE_MASK) {
			highest_cstate = i;
			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
		}
	}
	power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
		(highest_subcstate - 1);

	for_each_online_cpu(i)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &i);

#if defined(CONFIG_GENERIC_TIME) && defined(CONFIG_X86)
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
	case X86_VENDOR_INTEL:
		/*
		 * AMD Fam10h TSC will tick in all
		 * C/P/S0/S1 states when this bit is set.
		 */
		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
			return;

		/*FALL THROUGH*/
	default:
		/* TSC could halt in idle, so notify users */
		mark_tsc_unstable("TSC halts in idle");
	}
#endif
}
static void tsc_check_state(int state)
{
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
	case X86_VENDOR_INTEL:
		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
			return;

		
	default:
		
		if (state > ACPI_STATE_C1)
			mark_tsc_unstable("TSC halts in idle");
	}
}
Esempio n. 7
0
static void tsc_check_state(int state)
{
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
	case X86_VENDOR_INTEL:
		/*
		 * AMD Fam10h TSC will tick in all
		 * C/P/S0/S1 states when this bit is set.
		 */
		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
			return;

		/*FALL THROUGH*/
	default:
		/* TSC could halt in idle, so notify users */
		if (state > ACPI_STATE_C1)
			mark_tsc_unstable("TSC halts in idle");
	}
}
static int __init kretprobe_init(void)
{
	int ret;

	my_kretprobe.kp.symbol_name = func_name;
	ret = register_kretprobe(&my_kretprobe);
	if (ret < 0) {
		printk(KERN_INFO "register_kretprobe failed, returned %d\n",
				ret);
		return -1;
	}

	mark_tsc_unstable("kernel uptime bug");

	printk(KERN_INFO "Planted return probe at %s: %p\n",
			my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);

	return 0;
}
Esempio n. 9
0
static void __init ms_hyperv_init_platform(void)
{
	/*
	 * Extract the features and hints
	 */
	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);

	pr_info("HyperV: features 0x%x, hints 0x%x\n",
		ms_hyperv.features, ms_hyperv.hints);

#ifdef CONFIG_X86_LOCAL_APIC
	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
		/*
		 * Get the APIC frequency.
		 */
		u64	hv_lapic_frequency;

		rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
		hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
		lapic_timer_frequency = hv_lapic_frequency;
		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
			lapic_timer_frequency);
	}
#endif

	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
		clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);

#ifdef CONFIG_X86_IO_APIC
	no_timer_check = 1;
#endif

#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
	machine_ops.shutdown = hv_machine_shutdown;
	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
	mark_tsc_unstable("running on Hyper-V");
}
Esempio n. 10
0
void __init arch_post_acpi_subsys_init(void)
{
	u32 lo, hi;

	if (!boot_cpu_has_bug(X86_BUG_AMD_E400))
		return;

	/*
	 * AMD E400 detection needs to happen after ACPI has been enabled. If
	 * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in
	 * MSR_K8_INT_PENDING_MSG.
	 */
	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
	if (!(lo & K8_INTP_C1E_ACTIVE_MASK))
		return;

	boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E);

	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
		mark_tsc_unstable("TSC halt in AMD C1E");
	pr_info("System has AMD C1E enabled\n");
}
Esempio n. 11
0
/*
 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
 * pending message MSR. If we detect C1E, then we handle it the same
 * way as C3 power states (local apic timer and TSC stop)
 */
static void amd_e400_idle(void)
{
	if (!amd_e400_c1e_detected) {
		u32 lo, hi;

		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);

		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
			amd_e400_c1e_detected = true;
			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
				mark_tsc_unstable("TSC halt in AMD C1E");
			pr_info("System has AMD C1E enabled\n");
		}
	}

	if (amd_e400_c1e_detected) {
		int cpu = smp_processor_id();

		if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
			cpumask_set_cpu(cpu, amd_e400_c1e_mask);
			/* Force broadcast so ACPI can not interfere. */
			tick_broadcast_force();
			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
		}
		tick_broadcast_enter();

		default_idle();

		/*
		 * The switch back from broadcast mode needs to be
		 * called with interrupts disabled.
		 */
		local_irq_disable();
		tick_broadcast_exit();
		local_irq_enable();
	} else
		default_idle();
}
Esempio n. 12
0
/*
 * Source CPU calls into this - it waits for the freshly booted
 * target CPU to arrive and then starts the measurement:
 */
void __cpuinit check_tsc_sync_source(int cpu)
{
	int cpus = 2;

	/*
	 * No need to check if we already know that the TSC is not
	 * synchronized:
	 */
	if (unsynchronized_tsc())
		return;

	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
		printk(KERN_INFO
		       "Skipping synchronization checks as TSC is reliable.\n");
		return;
	}

	printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
			  smp_processor_id(), cpu);

	/*
	 * Reset it - in case this is a second bootup:
	 */
	atomic_set(&stop_count, 0);

	/*
	 * Wait for the target to arrive:
	 */
	while (atomic_read(&start_count) != cpus-1)
		cpu_relax();
	/*
	 * Trigger the target to continue into the measurement too:
	 */
	atomic_inc(&start_count);

	check_tsc_warp();

	while (atomic_read(&stop_count) != cpus-1)
		cpu_relax();

	if (nr_warps) {
		printk("\n");
		printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
				    " turning off TSC clock.\n", max_warp);
		mark_tsc_unstable("check_tsc_sync_source failed");
	} else {
		printk(" passed.\n");
	}

	/*
	 * Reset it - just in case we boot another CPU later:
	 */
	atomic_set(&start_count, 0);
	nr_warps = 0;
	max_warp = 0;
	last_tsc = 0;

	/*
	 * Let the target continue with the bootup:
	 */
	atomic_inc(&stop_count);
}
Esempio n. 13
0
/**
 * acpi_idle_enter_bm - enters C3 with proper BM handling
 * @dev: the target CPU
 * @state: the state data
 *
 * If BM is detected, the deepest non-C3 idle state is entered instead.
 */
static int acpi_idle_enter_bm(struct cpuidle_device *dev,
			      struct cpuidle_state *state)
{
	struct acpi_processor *pr;
	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
	u32 t1, t2;
	int sleep_ticks = 0;

	pr = __get_cpu_var(processors);

	if (unlikely(!pr))
		return 0;

	if (acpi_idle_suspend)
		return(acpi_idle_enter_c1(dev, state));

	if (acpi_idle_bm_check()) {
		if (dev->safe_state) {
			dev->last_state = dev->safe_state;
			return dev->safe_state->enter(dev, dev->safe_state);
		} else {
			local_irq_disable();
			acpi_safe_halt();
			local_irq_enable();
			return 0;
		}
	}

	local_irq_disable();
	current_thread_info()->status &= ~TS_POLLING;
	/*
	 * TS_POLLING-cleared state must be visible before we test
	 * NEED_RESCHED:
	 */
	smp_mb();

	if (unlikely(need_resched())) {
		current_thread_info()->status |= TS_POLLING;
		local_irq_enable();
		return 0;
	}

	acpi_unlazy_tlb(smp_processor_id());

	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
	acpi_state_timer_broadcast(pr, cx, 1);

	/*
	 * disable bus master
	 * bm_check implies we need ARB_DIS
	 * !bm_check implies we need cache flush
	 * bm_control implies whether we can do ARB_DIS
	 *
	 * That leaves a case where bm_check is set and bm_control is
	 * not set. In that case we cannot do much, we enter C3
	 * without doing anything.
	 */
	if (pr->flags.bm_check && pr->flags.bm_control) {
		spin_lock(&c3_lock);
		c3_cpu_count++;
		/* Disable bus master arbitration when all CPUs are in C3 */
		if (c3_cpu_count == num_online_cpus())
			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
		spin_unlock(&c3_lock);
	} else if (!pr->flags.bm_check) {
		ACPI_FLUSH_CPU_CACHE();
	}

	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
	acpi_idle_do_entry(cx);
	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);

	/* Re-enable bus master arbitration */
	if (pr->flags.bm_check && pr->flags.bm_control) {
		spin_lock(&c3_lock);
		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
		c3_cpu_count--;
		spin_unlock(&c3_lock);
	}

#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
	/* TSC could halt in idle, so notify users */
	if (tsc_halts_in_c(ACPI_STATE_C3))
		mark_tsc_unstable("TSC halts in idle");
#endif
	sleep_ticks = ticks_elapsed(t1, t2);
	/* Tell the scheduler how much we idled: */
	sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);

	local_irq_enable();
	current_thread_info()->status |= TS_POLLING;

	cx->usage++;

	acpi_state_timer_broadcast(pr, cx, 0);
	cx->time += sleep_ticks;
	return ticks_elapsed_in_us(t1, t2);
}
Esempio n. 14
0
/**
 * acpi_idle_enter_simple - enters an ACPI state without BM handling
 * @dev: the target CPU
 * @state: the state data
 */
static int acpi_idle_enter_simple(struct cpuidle_device *dev,
				  struct cpuidle_state *state)
{
	struct acpi_processor *pr;
	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
	u32 t1, t2;
	int sleep_ticks = 0;

	pr = __get_cpu_var(processors);

	if (unlikely(!pr))
		return 0;

	if (acpi_idle_suspend)
		return(acpi_idle_enter_c1(dev, state));

	local_irq_disable();
	current_thread_info()->status &= ~TS_POLLING;
	/*
	 * TS_POLLING-cleared state must be visible before we test
	 * NEED_RESCHED:
	 */
	smp_mb();

	if (unlikely(need_resched())) {
		current_thread_info()->status |= TS_POLLING;
		local_irq_enable();
		return 0;
	}

	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
	acpi_state_timer_broadcast(pr, cx, 1);

	if (cx->type == ACPI_STATE_C3)
		ACPI_FLUSH_CPU_CACHE();

	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
	acpi_idle_do_entry(cx);
	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);

#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
	/* TSC could halt in idle, so notify users */
	if (tsc_halts_in_c(cx->type))
		mark_tsc_unstable("TSC halts in idle");;
#endif
	sleep_ticks = ticks_elapsed(t1, t2);

	/* Tell the scheduler how much we idled: */
	sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);

	local_irq_enable();
	current_thread_info()->status |= TS_POLLING;

	cx->usage++;

	acpi_state_timer_broadcast(pr, cx, 0);
	cx->time += sleep_ticks;
	return ticks_elapsed_in_us(t1, t2);
}
Esempio n. 15
0
	/* avoid HT sibilings if possible */
	if (cpumask_empty(tmp))
		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
	if (cpumask_empty(tmp)) {
		mutex_unlock(&round_robin_lock);
		return;
	}
	for_each_cpu(cpu, tmp) {
		if (cpu_weight[cpu] < min_weight) {
			min_weight = cpu_weight[cpu];
			preferred_cpu = cpu;
		}
	}

	if (tsk_in_cpu[tsk_index] != -1)
		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = preferred_cpu;
	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
	cpu_weight[preferred_cpu]++;
	mutex_unlock(&round_robin_lock);

	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}

static void exit_round_robin(unsigned int tsk_index)
{
	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = -1;
}

static unsigned int idle_pct = 5; /* percentage */
static unsigned int round_robin_time = 1; /* second */
static int power_saving_thread(void *data)
{
	struct sched_param param = {.sched_priority = 1};
	int do_sleep;
	unsigned int tsk_index = (unsigned long)data;
	u64 last_jiffies = 0;

	sched_setscheduler(current, SCHED_RR, &param);

	while (!kthread_should_stop()) {
		unsigned long expire_time;

		try_to_freeze();

		/* round robin to cpus */
		expire_time = last_jiffies + round_robin_time * HZ;
		if (time_before(expire_time, jiffies)) {
			last_jiffies = jiffies;
			round_robin_cpu(tsk_index);
		}

		do_sleep = 0;

		expire_time = jiffies + HZ * (100 - idle_pct) / 100;

		while (!need_resched()) {
			if (tsc_detected_unstable && !tsc_marked_unstable) {
				/* TSC could halt in idle, so notify users */
				mark_tsc_unstable("TSC halts in idle");
				tsc_marked_unstable = 1;
			}
			local_irq_disable();
			tick_broadcast_enable();
			tick_broadcast_enter();
			stop_critical_timings();

			mwait_idle_with_hints(power_saving_mwait_eax, 1);

			start_critical_timings();
			tick_broadcast_exit();
			local_irq_enable();

			if (time_before(expire_time, jiffies)) {
				do_sleep = 1;
				break;
			}
		}

		/*
		 * current sched_rt has threshold for rt task running time.
		 * When a rt task uses 95% CPU time, the rt thread will be
		 * scheduled out for 5% CPU time to not starve other tasks. But
		 * the mechanism only works when all CPUs have RT task running,
		 * as if one CPU hasn't RT task, RT task from other CPUs will
		 * borrow CPU time from this CPU and cause RT task use > 95%
		 * CPU time. To make 'avoid starvation' work, takes a nap here.
		 */
		if (unlikely(do_sleep))
			schedule_timeout_killable(HZ * idle_pct / 100);

		/* If an external event has set the need_resched flag, then
		 * we need to deal with it, or this loop will continue to
		 * spin without calling __mwait().
		 */
		if (unlikely(need_resched()))
			schedule();
	}

	exit_round_robin(tsk_index);
	return 0;
}

static struct task_struct *ps_tsks[NR_CPUS];
static unsigned int ps_tsk_num;
static int create_power_saving_task(void)
{
	int rc;

	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
		(void *)(unsigned long)ps_tsk_num,
		"acpi_pad/%d", ps_tsk_num);

	if (IS_ERR(ps_tsks[ps_tsk_num])) {
		rc = PTR_ERR(ps_tsks[ps_tsk_num]);
		ps_tsks[ps_tsk_num] = NULL;
	} else {
		rc = 0;
		ps_tsk_num++;
	}

	return rc;
}
Esempio n. 16
0
/*
 * Source CPU calls into this - it waits for the freshly booted
 * target CPU to arrive and then starts the measurement:
 */
void check_tsc_sync_source(int cpu)
{
	int cpus = 2;

	/*
	 * No need to check if we already know that the TSC is not
	 * synchronized or if we have no TSC.
	 */
	if (unsynchronized_tsc())
		return;

	/*
	 * Set the maximum number of test runs to
	 *  1 if the CPU does not provide the TSC_ADJUST MSR
	 *  3 if the MSR is available, so the target can try to adjust
	 */
	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
		atomic_set(&test_runs, 1);
	else
		atomic_set(&test_runs, 3);
retry:
	/*
	 * Wait for the target to start or to skip the test:
	 */
	while (atomic_read(&start_count) != cpus - 1) {
		if (atomic_read(&skip_test) > 0) {
			atomic_set(&skip_test, 0);
			return;
		}
		cpu_relax();
	}

	/*
	 * Trigger the target to continue into the measurement too:
	 */
	atomic_inc(&start_count);

	check_tsc_warp(loop_timeout(cpu));

	while (atomic_read(&stop_count) != cpus-1)
		cpu_relax();

	/*
	 * If the test was successful set the number of runs to zero and
	 * stop. If not, decrement the number of runs an check if we can
	 * retry. In case of random warps no retry is attempted.
	 */
	if (!nr_warps) {
		atomic_set(&test_runs, 0);

		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
			smp_processor_id(), cpu);

	} else if (atomic_dec_and_test(&test_runs) || random_warps) {
		/* Force it to 0 if random warps brought us here */
		atomic_set(&test_runs, 0);

		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
			smp_processor_id(), cpu);
		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
			   "turning off TSC clock.\n", max_warp);
		if (random_warps)
			pr_warning("TSC warped randomly between CPUs\n");
		mark_tsc_unstable("check_tsc_sync_source failed");
	}

	/*
	 * Reset it - just in case we boot another CPU later:
	 */
	atomic_set(&start_count, 0);
	random_warps = 0;
	nr_warps = 0;
	max_warp = 0;
	last_tsc = 0;

	/*
	 * Let the target continue with the bootup:
	 */
	atomic_inc(&stop_count);

	/*
	 * Retry, if there is a chance to do so.
	 */
	if (atomic_read(&test_runs) > 0)
		goto retry;
}
Esempio n. 17
0
	/* avoid HT sibilings if possible */
	if (cpumask_empty(tmp))
		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
	if (cpumask_empty(tmp)) {
		mutex_unlock(&round_robin_lock);
		return;
	}
	for_each_cpu(cpu, tmp) {
		if (cpu_weight[cpu] < min_weight) {
			min_weight = cpu_weight[cpu];
			preferred_cpu = cpu;
		}
	}

	if (tsk_in_cpu[tsk_index] != -1)
		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = preferred_cpu;
	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
	cpu_weight[preferred_cpu]++;
	mutex_unlock(&round_robin_lock);

	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}

static void exit_round_robin(unsigned int tsk_index)
{
	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
	tsk_in_cpu[tsk_index] = -1;
}

static unsigned int idle_pct = 5; /* percentage */
static unsigned int round_robin_time = 1; /* second */
static int power_saving_thread(void *data)
{
	struct sched_param param = {.sched_priority = 1};
	int do_sleep;
	unsigned int tsk_index = (unsigned long)data;
	u64 last_jiffies = 0;

	sched_setscheduler(current, SCHED_RR, &param);
	set_freezable();

	while (!kthread_should_stop()) {
		int cpu;
		u64 expire_time;

		try_to_freeze();

		/* round robin to cpus */
		if (last_jiffies + round_robin_time * HZ < jiffies) {
			last_jiffies = jiffies;
			round_robin_cpu(tsk_index);
		}

		do_sleep = 0;

		expire_time = jiffies + HZ * (100 - idle_pct) / 100;

		while (!need_resched()) {
			if (tsc_detected_unstable && !tsc_marked_unstable) {
				/* TSC could halt in idle, so notify users */
				mark_tsc_unstable("TSC halts in idle");
				tsc_marked_unstable = 1;
			}
			if (lapic_detected_unstable && !lapic_marked_unstable) {
				int i;
				/* LAPIC could halt in idle, so notify users */
				for_each_online_cpu(i)
					clockevents_notify(
						CLOCK_EVT_NOTIFY_BROADCAST_ON,
						&i);
				lapic_marked_unstable = 1;
			}
			local_irq_disable();
			cpu = smp_processor_id();
			if (lapic_marked_unstable)
				clockevents_notify(
					CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
			stop_critical_timings();

			__monitor((void *)&current_thread_info()->flags, 0, 0);
			smp_mb();
			if (!need_resched())
				__mwait(power_saving_mwait_eax, 1);

			start_critical_timings();
			if (lapic_marked_unstable)
				clockevents_notify(
					CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
			local_irq_enable();

			if (jiffies > expire_time) {
				do_sleep = 1;
				break;
			}
		}

		/*
		 * current sched_rt has threshold for rt task running time.
		 * When a rt task uses 95% CPU time, the rt thread will be
		 * scheduled out for 5% CPU time to not starve other tasks. But
		 * the mechanism only works when all CPUs have RT task running,
		 * as if one CPU hasn't RT task, RT task from other CPUs will
		 * borrow CPU time from this CPU and cause RT task use > 95%
		 * CPU time. To make 'avoid starvation' work, takes a nap here.
		 */
		if (do_sleep)
			schedule_timeout_killable(HZ * idle_pct / 100);
	}

	exit_round_robin(tsk_index);
	return 0;
}

static struct task_struct *ps_tsks[NR_CPUS];
static unsigned int ps_tsk_num;
static int create_power_saving_task(void)
{
	int rc = -ENOMEM;

	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
		(void *)(unsigned long)ps_tsk_num,
		"acpi_pad/%d", ps_tsk_num);
	rc = PTR_RET(ps_tsks[ps_tsk_num]);
	if (!rc)
		ps_tsk_num++;
	else
		ps_tsks[ps_tsk_num] = NULL;

	return rc;
}
Esempio n. 18
0
/*
 * Source CPU calls into this - it waits for the freshly booted
 * target CPU to arrive and then starts the measurement:
 */
void __cpuinit check_tsc_sync_source(int cpu)
{
	int cpus = 2;

	/*
	 * No need to check if we already know that the TSC is not
	 * synchronized:
	 */
	if (unsynchronized_tsc())
		return;

	if (tsc_clocksource_reliable) {
		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
			pr_info(
			"Skipped synchronization checks as TSC is reliable.\n");
		return;
	}

	/*
	 * Reset it - in case this is a second bootup:
	 */
	atomic_set(&stop_count, 0);

	/*
	 * Wait for the target to arrive:
	 */
	while (atomic_read(&start_count) != cpus-1)
		cpu_relax();
	/*
	 * Trigger the target to continue into the measurement too:
	 */
	atomic_inc(&start_count);

	check_tsc_warp(loop_timeout(cpu));

	while (atomic_read(&stop_count) != cpus-1)
		cpu_relax();

	if (nr_warps) {
		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
			smp_processor_id(), cpu);
		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
			   "turning off TSC clock.\n", max_warp);
		mark_tsc_unstable("check_tsc_sync_source failed");
	} else {
		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
			smp_processor_id(), cpu);
	}

	/*
	 * Reset it - just in case we boot another CPU later:
	 */
	atomic_set(&start_count, 0);
	nr_warps = 0;
	max_warp = 0;
	last_tsc = 0;

	/*
	 * Let the target continue with the bootup:
	 */
	atomic_inc(&stop_count);
}